1: <?php
2: 3: 4: 5: 6: 7: 8: 9: 10: 11: 12: 13: 14: 15: 16: 17: 18: 19: 20: 21: 22: 23: 24: 25: 26: 27: 28: 29: 30: 31: 32: 33: 34: 35: 36: 37: 38: 39: 40: 41: 42: 43:
44:
45: 46: 47: 48: 49: 50: 51: 52:
53: class SimplePie_Locator
54: {
55: var $useragent;
56: var $timeout;
57: var $file;
58: var $local = array();
59: var $elsewhere = array();
60: var $cached_entities = array();
61: var $http_base;
62: var $base;
63: var $base_location = 0;
64: var $checked_feeds = 0;
65: var $max_checked_feeds = 10;
66: protected $registry;
67:
68: public function __construct(SimplePie_File $file, $timeout = 10, $useragent = null, $max_checked_feeds = 10)
69: {
70: $this->file = $file;
71: $this->useragent = $useragent;
72: $this->timeout = $timeout;
73: $this->max_checked_feeds = $max_checked_feeds;
74:
75: $this->dom = new DOMDocument();
76:
77: set_error_handler(array('SimplePie_Misc', 'silence_errors'));
78: $this->dom->loadHTML($this->file->body);
79: restore_error_handler();
80: }
81:
82: public function set_registry(SimplePie_Registry $registry)
83: {
84: $this->registry = $registry;
85: }
86:
87: public function find($type = SIMPLEPIE_LOCATOR_ALL, &$working)
88: {
89: if ($this->is_feed($this->file))
90: {
91: return $this->file;
92: }
93:
94: if ($this->file->method & SIMPLEPIE_FILE_SOURCE_REMOTE)
95: {
96: $sniffer = $this->registry->create('Content_Type_Sniffer', array($this->file));
97: if ($sniffer->get_type() !== 'text/html')
98: {
99: return null;
100: }
101: }
102:
103: if ($type & ~SIMPLEPIE_LOCATOR_NONE)
104: {
105: $this->get_base();
106: }
107:
108: if ($type & SIMPLEPIE_LOCATOR_AUTODISCOVERY && $working = $this->autodiscovery())
109: {
110: return $working[0];
111: }
112:
113: if ($type & (SIMPLEPIE_LOCATOR_LOCAL_EXTENSION | SIMPLEPIE_LOCATOR_LOCAL_BODY | SIMPLEPIE_LOCATOR_REMOTE_EXTENSION | SIMPLEPIE_LOCATOR_REMOTE_BODY) && $this->get_links())
114: {
115: if ($type & SIMPLEPIE_LOCATOR_LOCAL_EXTENSION && $working = $this->extension($this->local))
116: {
117: return $working;
118: }
119:
120: if ($type & SIMPLEPIE_LOCATOR_LOCAL_BODY && $working = $this->body($this->local))
121: {
122: return $working;
123: }
124:
125: if ($type & SIMPLEPIE_LOCATOR_REMOTE_EXTENSION && $working = $this->extension($this->elsewhere))
126: {
127: return $working;
128: }
129:
130: if ($type & SIMPLEPIE_LOCATOR_REMOTE_BODY && $working = $this->body($this->elsewhere))
131: {
132: return $working;
133: }
134: }
135: return null;
136: }
137:
138: public function is_feed($file)
139: {
140: if ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE)
141: {
142: $sniffer = $this->registry->create('Content_Type_Sniffer', array($file));
143: $sniffed = $sniffer->get_type();
144: if (in_array($sniffed, array('application/rss+xml', 'application/rdf+xml', 'text/rdf', 'application/atom+xml', 'text/xml', 'application/xml')))
145: {
146: return true;
147: }
148: else
149: {
150: return false;
151: }
152: }
153: elseif ($file->method & SIMPLEPIE_FILE_SOURCE_LOCAL)
154: {
155: return true;
156: }
157: else
158: {
159: return false;
160: }
161: }
162:
163: public function get_base()
164: {
165: $this->http_base = $this->file->url;
166: $this->base = $this->http_base;
167: $elements = $this->dom->getElementsByTagName('base');
168: foreach ($elements as $element)
169: {
170: if ($element->hasAttribute('href'))
171: {
172: $this->base = $this->registry->call('Misc', 'absolutize_url', array(trim($element->getAttribute('href')), $this->http_base));
173: $this->base_location = method_exists($element, 'getLineNo') ? $element->getLineNo() : 0;
174: break;
175: }
176: }
177: }
178:
179: public function autodiscovery()
180: {
181: $done = array();
182: $feeds = array();
183: $feeds = array_merge($feeds, $this->search_elements_by_tag('link', $done, $feeds));
184: $feeds = array_merge($feeds, $this->search_elements_by_tag('a', $done, $feeds));
185: $feeds = array_merge($feeds, $this->search_elements_by_tag('area', $done, $feeds));
186:
187: if (!empty($feeds))
188: {
189: return array_values($feeds);
190: }
191: else
192: {
193: return null;
194: }
195: }
196:
197: protected function search_elements_by_tag($name, &$done, $feeds)
198: {
199: $links = $this->dom->getElementsByTagName($name);
200: foreach ($links as $link)
201: {
202: if ($this->checked_feeds === $this->max_checked_feeds)
203: {
204: break;
205: }
206: if ($link->hasAttribute('href') && $link->hasAttribute('rel'))
207: {
208: $rel = array_unique($this->registry->call('Misc', 'space_seperated_tokens', array(strtolower($link->getAttribute('rel')))));
209: $line = method_exists($link, 'getLineNo') ? $link->getLineNo() : 1;
210:
211: if ($this->base_location < $line)
212: {
213: $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->base));
214: }
215: else
216: {
217: $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->http_base));
218: }
219:
220: if (!in_array($href, $done) && in_array('feed', $rel) || (in_array('alternate', $rel) && !in_array('stylesheet', $rel) && $link->hasAttribute('type') && in_array(strtolower($this->registry->call('Misc', 'parse_mime', array($link->getAttribute('type')))), array('application/rss+xml', 'application/atom+xml'))) && !isset($feeds[$href]))
221: {
222: $this->checked_feeds++;
223: $headers = array(
224: 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
225: );
226: $feed = $this->registry->create('File', array($href, $this->timeout, 5, $headers, $this->useragent));
227: if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed))
228: {
229: $feeds[$href] = $feed;
230: }
231: }
232: $done[] = $href;
233: }
234: }
235:
236: return $feeds;
237: }
238:
239: public function get_links()
240: {
241: $links = $this->dom->getElementsByTagName('a');
242: foreach ($links as $link)
243: {
244: if ($link->hasAttribute('href'))
245: {
246: $href = trim($link->getAttribute('href'));
247: $parsed = $this->registry->call('Misc', 'parse_url', array($href));
248: if ($parsed['scheme'] === '' || preg_match('/^(http(s)|feed)?$/i', $parsed['scheme']))
249: {
250: if ($this->base_location < $link->getLineNo())
251: {
252: $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->base));
253: }
254: else
255: {
256: $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->http_base));
257: }
258:
259: $current = $this->registry->call('Misc', 'parse_url', array($this->file->url));
260:
261: if ($parsed['authority'] === '' || $parsed['authority'] === $current['authority'])
262: {
263: $this->local[] = $href;
264: }
265: else
266: {
267: $this->elsewhere[] = $href;
268: }
269: }
270: }
271: }
272: $this->local = array_unique($this->local);
273: $this->elsewhere = array_unique($this->elsewhere);
274: if (!empty($this->local) || !empty($this->elsewhere))
275: {
276: return true;
277: }
278: return null;
279: }
280:
281: public function extension(&$array)
282: {
283: foreach ($array as $key => $value)
284: {
285: if ($this->checked_feeds === $this->max_checked_feeds)
286: {
287: break;
288: }
289: if (in_array(strtolower(strrchr($value, '.')), array('.rss', '.rdf', '.atom', '.xml')))
290: {
291: $this->checked_feeds++;
292:
293: $headers = array(
294: 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
295: );
296: $feed = $this->registry->create('File', array($value, $this->timeout, 5, $headers, $this->useragent));
297: if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed))
298: {
299: return $feed;
300: }
301: else
302: {
303: unset($array[$key]);
304: }
305: }
306: }
307: return null;
308: }
309:
310: public function body(&$array)
311: {
312: foreach ($array as $key => $value)
313: {
314: if ($this->checked_feeds === $this->max_checked_feeds)
315: {
316: break;
317: }
318: if (preg_match('/(rss|rdf|atom|xml)/i', $value))
319: {
320: $this->checked_feeds++;
321: $headers = array(
322: 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
323: );
324: $feed = $this->registry->create('File', array($value, $this->timeout, 5, null, $this->useragent));
325: if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed))
326: {
327: return $feed;
328: }
329: else
330: {
331: unset($array[$key]);
332: }
333: }
334: }
335: return null;
336: }
337: }
338:
339: