SimplePie: PHP-based RSS and Atom feed handling
 
  • Overview
  • Demo
  • Blog
  • Download
  • Documentation
  • API Docs
  • Support
  • Issue Tracker
  • FAQ
  • Overview
  • Package
  • Class
  • Tree
  • Deprecated

Packages

  • SimplePie
    • API
    • Caching
    • HTTP
    • Parsing

Classes

  • SimplePie_Decode_HTML_Entities
  • SimplePie_Locator
  • SimplePie_Misc
  • SimplePie_Registry
  • SimplePie_Sanitize
  1: <?php
  2: /**
  3:  * SimplePie
  4:  *
  5:  * A PHP-Based RSS and Atom Feed Framework.
  6:  * Takes the hard work out of managing a complete RSS/Atom solution.
  7:  *
  8:  * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
  9:  * All rights reserved.
 10:  *
 11:  * Redistribution and use in source and binary forms, with or without modification, are
 12:  * permitted provided that the following conditions are met:
 13:  *
 14:  *  * Redistributions of source code must retain the above copyright notice, this list of
 15:  *    conditions and the following disclaimer.
 16:  *
 17:  *  * Redistributions in binary form must reproduce the above copyright notice, this list
 18:  *    of conditions and the following disclaimer in the documentation and/or other materials
 19:  *    provided with the distribution.
 20:  *
 21:  *  * Neither the name of the SimplePie Team nor the names of its contributors may be used
 22:  *    to endorse or promote products derived from this software without specific prior
 23:  *    written permission.
 24:  *
 25:  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
 26:  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
 27:  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
 28:  * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 29:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 30:  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 31:  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 32:  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 33:  * POSSIBILITY OF SUCH DAMAGE.
 34:  *
 35:  * @package SimplePie
 36:  * @version 1.3
 37:  * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
 38:  * @author Ryan Parman
 39:  * @author Geoffrey Sneddon
 40:  * @author Ryan McCue
 41:  * @link http://simplepie.org/ SimplePie
 42:  * @license http://www.opensource.org/licenses/bsd-license.php BSD License
 43:  */
 44: 
 45: /**
 46:  * Used for feed auto-discovery
 47:  *
 48:  *
 49:  * This class can be overloaded with {@see SimplePie::set_locator_class()}
 50:  *
 51:  * @package SimplePie
 52:  */
 53: class SimplePie_Locator
 54: {
 55:     var $useragent;
 56:     var $timeout;
 57:     var $file;
 58:     var $local = array();
 59:     var $elsewhere = array();
 60:     var $cached_entities = array();
 61:     var $http_base;
 62:     var $base;
 63:     var $base_location = 0;
 64:     var $checked_feeds = 0;
 65:     var $max_checked_feeds = 10;
 66:     protected $registry;
 67: 
 68:     public function __construct(SimplePie_File $file, $timeout = 10, $useragent = null, $max_checked_feeds = 10)
 69:     {
 70:         $this->file = $file;
 71:         $this->useragent = $useragent;
 72:         $this->timeout = $timeout;
 73:         $this->max_checked_feeds = $max_checked_feeds;
 74: 
 75:         $this->dom = new DOMDocument();
 76: 
 77:         set_error_handler(array('SimplePie_Misc', 'silence_errors'));
 78:         $this->dom->loadHTML($this->file->body);
 79:         restore_error_handler();
 80:     }
 81: 
 82:     public function set_registry(SimplePie_Registry $registry)
 83:     {
 84:         $this->registry = $registry;
 85:     }
 86: 
 87:     public function find($type = SIMPLEPIE_LOCATOR_ALL, &$working)
 88:     {
 89:         if ($this->is_feed($this->file))
 90:         {
 91:             return $this->file;
 92:         }
 93: 
 94:         if ($this->file->method & SIMPLEPIE_FILE_SOURCE_REMOTE)
 95:         {
 96:             $sniffer = $this->registry->create('Content_Type_Sniffer', array($this->file));
 97:             if ($sniffer->get_type() !== 'text/html')
 98:             {
 99:                 return null;
100:             }
101:         }
102: 
103:         if ($type & ~SIMPLEPIE_LOCATOR_NONE)
104:         {
105:             $this->get_base();
106:         }
107: 
108:         if ($type & SIMPLEPIE_LOCATOR_AUTODISCOVERY && $working = $this->autodiscovery())
109:         {
110:             return $working[0];
111:         }
112: 
113:         if ($type & (SIMPLEPIE_LOCATOR_LOCAL_EXTENSION | SIMPLEPIE_LOCATOR_LOCAL_BODY | SIMPLEPIE_LOCATOR_REMOTE_EXTENSION | SIMPLEPIE_LOCATOR_REMOTE_BODY) && $this->get_links())
114:         {
115:             if ($type & SIMPLEPIE_LOCATOR_LOCAL_EXTENSION && $working = $this->extension($this->local))
116:             {
117:                 return $working;
118:             }
119: 
120:             if ($type & SIMPLEPIE_LOCATOR_LOCAL_BODY && $working = $this->body($this->local))
121:             {
122:                 return $working;
123:             }
124: 
125:             if ($type & SIMPLEPIE_LOCATOR_REMOTE_EXTENSION && $working = $this->extension($this->elsewhere))
126:             {
127:                 return $working;
128:             }
129: 
130:             if ($type & SIMPLEPIE_LOCATOR_REMOTE_BODY && $working = $this->body($this->elsewhere))
131:             {
132:                 return $working;
133:             }
134:         }
135:         return null;
136:     }
137: 
138:     public function is_feed($file)
139:     {
140:         if ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE)
141:         {
142:             $sniffer = $this->registry->create('Content_Type_Sniffer', array($file));
143:             $sniffed = $sniffer->get_type();
144:             if (in_array($sniffed, array('application/rss+xml', 'application/rdf+xml', 'text/rdf', 'application/atom+xml', 'text/xml', 'application/xml')))
145:             {
146:                 return true;
147:             }
148:             else
149:             {
150:                 return false;
151:             }
152:         }
153:         elseif ($file->method & SIMPLEPIE_FILE_SOURCE_LOCAL)
154:         {
155:             return true;
156:         }
157:         else
158:         {
159:             return false;
160:         }
161:     }
162: 
163:     public function get_base()
164:     {
165:         $this->http_base = $this->file->url;
166:         $this->base = $this->http_base;
167:         $elements = $this->dom->getElementsByTagName('base');
168:         foreach ($elements as $element)
169:         {
170:             if ($element->hasAttribute('href'))
171:             {
172:                 $this->base = $this->registry->call('Misc', 'absolutize_url', array(trim($element->getAttribute('href')), $this->http_base));
173:                 $this->base_location = method_exists($element, 'getLineNo') ? $element->getLineNo() : 0;
174:                 break;
175:             }
176:         }
177:     }
178: 
179:     public function autodiscovery()
180:     {
181:         $done = array();
182:         $feeds = array();
183:         $feeds = array_merge($feeds, $this->search_elements_by_tag('link', $done, $feeds));
184:         $feeds = array_merge($feeds, $this->search_elements_by_tag('a', $done, $feeds));
185:         $feeds = array_merge($feeds, $this->search_elements_by_tag('area', $done, $feeds));
186: 
187:         if (!empty($feeds))
188:         {
189:             return array_values($feeds);
190:         }
191:         else
192:         {
193:             return null;
194:         }
195:     }
196: 
197:     protected function search_elements_by_tag($name, &$done, $feeds)
198:     {
199:         $links = $this->dom->getElementsByTagName($name);
200:         foreach ($links as $link)
201:         {
202:             if ($this->checked_feeds === $this->max_checked_feeds)
203:             {
204:                 break;
205:             }
206:             if ($link->hasAttribute('href') && $link->hasAttribute('rel'))
207:             {
208:                 $rel = array_unique($this->registry->call('Misc', 'space_seperated_tokens', array(strtolower($link->getAttribute('rel')))));
209:                 $line = method_exists($link, 'getLineNo') ? $link->getLineNo() : 1;
210: 
211:                 if ($this->base_location < $line)
212:                 {
213:                     $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->base));
214:                 }
215:                 else
216:                 {
217:                     $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->http_base));
218:                 }
219: 
220:                 if (!in_array($href, $done) && in_array('feed', $rel) || (in_array('alternate', $rel) && !in_array('stylesheet', $rel) && $link->hasAttribute('type') && in_array(strtolower($this->registry->call('Misc', 'parse_mime', array($link->getAttribute('type')))), array('application/rss+xml', 'application/atom+xml'))) && !isset($feeds[$href]))
221:                 {
222:                     $this->checked_feeds++;
223:                     $headers = array(
224:                         'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
225:                     );
226:                     $feed = $this->registry->create('File', array($href, $this->timeout, 5, $headers, $this->useragent));
227:                     if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed))
228:                     {
229:                         $feeds[$href] = $feed;
230:                     }
231:                 }
232:                 $done[] = $href;
233:             }
234:         }
235: 
236:         return $feeds;
237:     }
238: 
239:     public function get_links()
240:     {
241:         $links = $this->dom->getElementsByTagName('a');
242:         foreach ($links as $link)
243:         {
244:             if ($link->hasAttribute('href'))
245:             {
246:                 $href = trim($link->getAttribute('href'));
247:                 $parsed = $this->registry->call('Misc', 'parse_url', array($href));
248:                 if ($parsed['scheme'] === '' || preg_match('/^(http(s)|feed)?$/i', $parsed['scheme']))
249:                 {
250:                     if ($this->base_location < $link->getLineNo())
251:                     {
252:                         $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->base));
253:                     }
254:                     else
255:                     {
256:                         $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->http_base));
257:                     }
258: 
259:                     $current = $this->registry->call('Misc', 'parse_url', array($this->file->url));
260: 
261:                     if ($parsed['authority'] === '' || $parsed['authority'] === $current['authority'])
262:                     {
263:                         $this->local[] = $href;
264:                     }
265:                     else
266:                     {
267:                         $this->elsewhere[] = $href;
268:                     }
269:                 }
270:             }
271:         }
272:         $this->local = array_unique($this->local);
273:         $this->elsewhere = array_unique($this->elsewhere);
274:         if (!empty($this->local) || !empty($this->elsewhere))
275:         {
276:             return true;
277:         }
278:         return null;
279:     }
280: 
281:     public function extension(&$array)
282:     {
283:         foreach ($array as $key => $value)
284:         {
285:             if ($this->checked_feeds === $this->max_checked_feeds)
286:             {
287:                 break;
288:             }
289:             if (in_array(strtolower(strrchr($value, '.')), array('.rss', '.rdf', '.atom', '.xml')))
290:             {
291:                 $this->checked_feeds++;
292: 
293:                 $headers = array(
294:                     'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
295:                 );
296:                 $feed = $this->registry->create('File', array($value, $this->timeout, 5, $headers, $this->useragent));
297:                 if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed))
298:                 {
299:                     return $feed;
300:                 }
301:                 else
302:                 {
303:                     unset($array[$key]);
304:                 }
305:             }
306:         }
307:         return null;
308:     }
309: 
310:     public function body(&$array)
311:     {
312:         foreach ($array as $key => $value)
313:         {
314:             if ($this->checked_feeds === $this->max_checked_feeds)
315:             {
316:                 break;
317:             }
318:             if (preg_match('/(rss|rdf|atom|xml)/i', $value))
319:             {
320:                 $this->checked_feeds++;
321:                 $headers = array(
322:                     'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
323:                 );
324:                 $feed = $this->registry->create('File', array($value, $this->timeout, 5, null, $this->useragent));
325:                 if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed))
326:                 {
327:                     return $feed;
328:                 }
329:                 else
330:                 {
331:                     unset($array[$key]);
332:                 }
333:             }
334:         }
335:         return null;
336:     }
337: }
338: 
339: 

Show some love! Wishlists for Geoffrey, Ryan P., and Ryan M.

SimplePie is © 2004–2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue and contributors. Licensed under the BSD License. Hosted thanks to Matt Mullenweg, API documentation generated by ApiGen 2.6.1. Variation on the Feed Icon by Wolfgang Bartelme.