SimplePie: PHP-based RSS and Atom feed handling
 
  • Overview
  • Demo
  • Blog
  • Download
  • Documentation
  • API Docs
  • Support
  • Issue Tracker
  • FAQ
  • Overview
  • Package
  • Class
  • Tree
  • Deprecated

Packages

  • SimplePie
    • API
    • Caching
    • HTTP
    • Parsing

Classes

  • SimplePie_Parse_Date
  • SimplePie_Parser
  • SimplePie_XML_Declaration_Parser
  1: <?php
  2: /**
  3:  * SimplePie
  4:  *
  5:  * A PHP-Based RSS and Atom Feed Framework.
  6:  * Takes the hard work out of managing a complete RSS/Atom solution.
  7:  *
  8:  * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
  9:  * All rights reserved.
 10:  *
 11:  * Redistribution and use in source and binary forms, with or without modification, are
 12:  * permitted provided that the following conditions are met:
 13:  *
 14:  *  * Redistributions of source code must retain the above copyright notice, this list of
 15:  *    conditions and the following disclaimer.
 16:  *
 17:  *  * Redistributions in binary form must reproduce the above copyright notice, this list
 18:  *    of conditions and the following disclaimer in the documentation and/or other materials
 19:  *    provided with the distribution.
 20:  *
 21:  *  * Neither the name of the SimplePie Team nor the names of its contributors may be used
 22:  *    to endorse or promote products derived from this software without specific prior
 23:  *    written permission.
 24:  *
 25:  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
 26:  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
 27:  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
 28:  * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 29:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 30:  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 31:  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 32:  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 33:  * POSSIBILITY OF SUCH DAMAGE.
 34:  *
 35:  * @package SimplePie
 36:  * @version 1.3
 37:  * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
 38:  * @author Ryan Parman
 39:  * @author Geoffrey Sneddon
 40:  * @author Ryan McCue
 41:  * @link http://simplepie.org/ SimplePie
 42:  * @license http://www.opensource.org/licenses/bsd-license.php BSD License
 43:  */
 44: 
 45: /**
 46:  * Parses XML into something sane
 47:  *
 48:  *
 49:  * This class can be overloaded with {@see SimplePie::set_parser_class()}
 50:  *
 51:  * @package SimplePie
 52:  * @subpackage Parsing
 53:  */
 54: class SimplePie_Parser
 55: {
 56:     var $error_code;
 57:     var $error_string;
 58:     var $current_line;
 59:     var $current_column;
 60:     var $current_byte;
 61:     var $separator = ' ';
 62:     var $namespace = array('');
 63:     var $element = array('');
 64:     var $xml_base = array('');
 65:     var $xml_base_explicit = array(false);
 66:     var $xml_lang = array('');
 67:     var $data = array();
 68:     var $datas = array(array());
 69:     var $current_xhtml_construct = -1;
 70:     var $encoding;
 71:     protected $registry;
 72: 
 73:     public function set_registry(SimplePie_Registry $registry)
 74:     {
 75:         $this->registry = $registry;
 76:     }
 77: 
 78:     public function parse(&$data, $encoding)
 79:     {
 80:         // Use UTF-8 if we get passed US-ASCII, as every US-ASCII character is a UTF-8 character
 81:         if (strtoupper($encoding) === 'US-ASCII')
 82:         {
 83:             $this->encoding = 'UTF-8';
 84:         }
 85:         else
 86:         {
 87:             $this->encoding = $encoding;
 88:         }
 89: 
 90:         // Strip BOM:
 91:         // UTF-32 Big Endian BOM
 92:         if (substr($data, 0, 4) === "\x00\x00\xFE\xFF")
 93:         {
 94:             $data = substr($data, 4);
 95:         }
 96:         // UTF-32 Little Endian BOM
 97:         elseif (substr($data, 0, 4) === "\xFF\xFE\x00\x00")
 98:         {
 99:             $data = substr($data, 4);
100:         }
101:         // UTF-16 Big Endian BOM
102:         elseif (substr($data, 0, 2) === "\xFE\xFF")
103:         {
104:             $data = substr($data, 2);
105:         }
106:         // UTF-16 Little Endian BOM
107:         elseif (substr($data, 0, 2) === "\xFF\xFE")
108:         {
109:             $data = substr($data, 2);
110:         }
111:         // UTF-8 BOM
112:         elseif (substr($data, 0, 3) === "\xEF\xBB\xBF")
113:         {
114:             $data = substr($data, 3);
115:         }
116: 
117:         if (substr($data, 0, 5) === '<?xml' && strspn(substr($data, 5, 1), "\x09\x0A\x0D\x20") && ($pos = strpos($data, '?>')) !== false)
118:         {
119:             $declaration = $this->registry->create('XML_Declaration_Parser', array(substr($data, 5, $pos - 5)));
120:             if ($declaration->parse())
121:             {
122:                 $data = substr($data, $pos + 2);
123:                 $data = '<?xml version="' . $declaration->version . '" encoding="' . $encoding . '" standalone="' . (($declaration->standalone) ? 'yes' : 'no') . '"?>' . $data;
124:             }
125:             else
126:             {
127:                 $this->error_string = 'SimplePie bug! Please report this!';
128:                 return false;
129:             }
130:         }
131: 
132:         $return = true;
133: 
134:         static $xml_is_sane = null;
135:         if ($xml_is_sane === null)
136:         {
137:             $parser_check = xml_parser_create();
138:             xml_parse_into_struct($parser_check, '<foo>&amp;</foo>', $values);
139:             xml_parser_free($parser_check);
140:             $xml_is_sane = isset($values[0]['value']);
141:         }
142: 
143:         // Create the parser
144:         if ($xml_is_sane)
145:         {
146:             $xml = xml_parser_create_ns($this->encoding, $this->separator);
147:             xml_parser_set_option($xml, XML_OPTION_SKIP_WHITE, 1);
148:             xml_parser_set_option($xml, XML_OPTION_CASE_FOLDING, 0);
149:             xml_set_object($xml, $this);
150:             xml_set_character_data_handler($xml, 'cdata');
151:             xml_set_element_handler($xml, 'tag_open', 'tag_close');
152: 
153:             // Parse!
154:             if (!xml_parse($xml, $data, true))
155:             {
156:                 $this->error_code = xml_get_error_code($xml);
157:                 $this->error_string = xml_error_string($this->error_code);
158:                 $return = false;
159:             }
160:             $this->current_line = xml_get_current_line_number($xml);
161:             $this->current_column = xml_get_current_column_number($xml);
162:             $this->current_byte = xml_get_current_byte_index($xml);
163:             xml_parser_free($xml);
164:             return $return;
165:         }
166:         else
167:         {
168:             libxml_clear_errors();
169:             $xml = new XMLReader();
170:             $xml->xml($data);
171:             while (@$xml->read())
172:             {
173:                 switch ($xml->nodeType)
174:                 {
175: 
176:                     case constant('XMLReader::END_ELEMENT'):
177:                         if ($xml->namespaceURI !== '')
178:                         {
179:                             $tagName = $xml->namespaceURI . $this->separator . $xml->localName;
180:                         }
181:                         else
182:                         {
183:                             $tagName = $xml->localName;
184:                         }
185:                         $this->tag_close(null, $tagName);
186:                         break;
187:                     case constant('XMLReader::ELEMENT'):
188:                         $empty = $xml->isEmptyElement;
189:                         if ($xml->namespaceURI !== '')
190:                         {
191:                             $tagName = $xml->namespaceURI . $this->separator . $xml->localName;
192:                         }
193:                         else
194:                         {
195:                             $tagName = $xml->localName;
196:                         }
197:                         $attributes = array();
198:                         while ($xml->moveToNextAttribute())
199:                         {
200:                             if ($xml->namespaceURI !== '')
201:                             {
202:                                 $attrName = $xml->namespaceURI . $this->separator . $xml->localName;
203:                             }
204:                             else
205:                             {
206:                                 $attrName = $xml->localName;
207:                             }
208:                             $attributes[$attrName] = $xml->value;
209:                         }
210:                         $this->tag_open(null, $tagName, $attributes);
211:                         if ($empty)
212:                         {
213:                             $this->tag_close(null, $tagName);
214:                         }
215:                         break;
216:                     case constant('XMLReader::TEXT'):
217: 
218:                     case constant('XMLReader::CDATA'):
219:                         $this->cdata(null, $xml->value);
220:                         break;
221:                 }
222:             }
223:             if ($error = libxml_get_last_error())
224:             {
225:                 $this->error_code = $error->code;
226:                 $this->error_string = $error->message;
227:                 $this->current_line = $error->line;
228:                 $this->current_column = $error->column;
229:                 return false;
230:             }
231:             else
232:             {
233:                 return true;
234:             }
235:         }
236:     }
237: 
238:     public function get_error_code()
239:     {
240:         return $this->error_code;
241:     }
242: 
243:     public function get_error_string()
244:     {
245:         return $this->error_string;
246:     }
247: 
248:     public function get_current_line()
249:     {
250:         return $this->current_line;
251:     }
252: 
253:     public function get_current_column()
254:     {
255:         return $this->current_column;
256:     }
257: 
258:     public function get_current_byte()
259:     {
260:         return $this->current_byte;
261:     }
262: 
263:     public function get_data()
264:     {
265:         return $this->data;
266:     }
267: 
268:     public function tag_open($parser, $tag, $attributes)
269:     {
270:         list($this->namespace[], $this->element[]) = $this->split_ns($tag);
271: 
272:         $attribs = array();
273:         foreach ($attributes as $name => $value)
274:         {
275:             list($attrib_namespace, $attribute) = $this->split_ns($name);
276:             $attribs[$attrib_namespace][$attribute] = $value;
277:         }
278: 
279:         if (isset($attribs[SIMPLEPIE_NAMESPACE_XML]['base']))
280:         {
281:             $this->xml_base[] = $this->registry->call('Misc', 'absolutize_url', array($attribs[SIMPLEPIE_NAMESPACE_XML]['base'], end($this->xml_base)));
282:             $this->xml_base_explicit[] = true;
283:         }
284:         else
285:         {
286:             $this->xml_base[] = end($this->xml_base);
287:             $this->xml_base_explicit[] = end($this->xml_base_explicit);
288:         }
289: 
290:         if (isset($attribs[SIMPLEPIE_NAMESPACE_XML]['lang']))
291:         {
292:             $this->xml_lang[] = $attribs[SIMPLEPIE_NAMESPACE_XML]['lang'];
293:         }
294:         else
295:         {
296:             $this->xml_lang[] = end($this->xml_lang);
297:         }
298: 
299:         if ($this->current_xhtml_construct >= 0)
300:         {
301:             $this->current_xhtml_construct++;
302:             if (end($this->namespace) === SIMPLEPIE_NAMESPACE_XHTML)
303:             {
304:                 $this->data['data'] .= '<' . end($this->element);
305:                 if (isset($attribs['']))
306:                 {
307:                     foreach ($attribs[''] as $name => $value)
308:                     {
309:                         $this->data['data'] .= ' ' . $name . '="' . htmlspecialchars($value, ENT_COMPAT, $this->encoding) . '"';
310:                     }
311:                 }
312:                 $this->data['data'] .= '>';
313:             }
314:         }
315:         else
316:         {
317:             $this->datas[] =& $this->data;
318:             $this->data =& $this->data['child'][end($this->namespace)][end($this->element)][];
319:             $this->data = array('data' => '', 'attribs' => $attribs, 'xml_base' => end($this->xml_base), 'xml_base_explicit' => end($this->xml_base_explicit), 'xml_lang' => end($this->xml_lang));
320:             if ((end($this->namespace) === SIMPLEPIE_NAMESPACE_ATOM_03 && in_array(end($this->element), array('title', 'tagline', 'copyright', 'info', 'summary', 'content')) && isset($attribs['']['mode']) && $attribs['']['mode'] === 'xml')
321:             || (end($this->namespace) === SIMPLEPIE_NAMESPACE_ATOM_10 && in_array(end($this->element), array('rights', 'subtitle', 'summary', 'info', 'title', 'content')) && isset($attribs['']['type']) && $attribs['']['type'] === 'xhtml')
322:             || (end($this->namespace) === SIMPLEPIE_NAMESPACE_RSS_20 && in_array(end($this->element), array('title')))
323:             || (end($this->namespace) === SIMPLEPIE_NAMESPACE_RSS_090 && in_array(end($this->element), array('title')))
324:             || (end($this->namespace) === SIMPLEPIE_NAMESPACE_RSS_10 && in_array(end($this->element), array('title'))))
325:             {
326:                 $this->current_xhtml_construct = 0;
327:             }
328:         }
329:     }
330: 
331:     public function cdata($parser, $cdata)
332:     {
333:         if ($this->current_xhtml_construct >= 0)
334:         {
335:             $this->data['data'] .= htmlspecialchars($cdata, ENT_QUOTES, $this->encoding);
336:         }
337:         else
338:         {
339:             $this->data['data'] .= $cdata;
340:         }
341:     }
342: 
343:     public function tag_close($parser, $tag)
344:     {
345:         if ($this->current_xhtml_construct >= 0)
346:         {
347:             $this->current_xhtml_construct--;
348:             if (end($this->namespace) === SIMPLEPIE_NAMESPACE_XHTML && !in_array(end($this->element), array('area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 'img', 'input', 'isindex', 'link', 'meta', 'param')))
349:             {
350:                 $this->data['data'] .= '</' . end($this->element) . '>';
351:             }
352:         }
353:         if ($this->current_xhtml_construct === -1)
354:         {
355:             $this->data =& $this->datas[count($this->datas) - 1];
356:             array_pop($this->datas);
357:         }
358: 
359:         array_pop($this->element);
360:         array_pop($this->namespace);
361:         array_pop($this->xml_base);
362:         array_pop($this->xml_base_explicit);
363:         array_pop($this->xml_lang);
364:     }
365: 
366:     public function split_ns($string)
367:     {
368:         static $cache = array();
369:         if (!isset($cache[$string]))
370:         {
371:             if ($pos = strpos($string, $this->separator))
372:             {
373:                 static $separator_length;
374:                 if (!$separator_length)
375:                 {
376:                     $separator_length = strlen($this->separator);
377:                 }
378:                 $namespace = substr($string, 0, $pos);
379:                 $local_name = substr($string, $pos + $separator_length);
380:                 if (strtolower($namespace) === SIMPLEPIE_NAMESPACE_ITUNES)
381:                 {
382:                     $namespace = SIMPLEPIE_NAMESPACE_ITUNES;
383:                 }
384: 
385:                 // Normalize the Media RSS namespaces
386:                 if ($namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG ||
387:                     $namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG2 ||
388:                     $namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG3 ||
389:                     $namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG4 ||
390:                     $namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG5 )
391:                 {
392:                     $namespace = SIMPLEPIE_NAMESPACE_MEDIARSS;
393:                 }
394:                 $cache[$string] = array($namespace, $local_name);
395:             }
396:             else
397:             {
398:                 $cache[$string] = array('', $string);
399:             }
400:         }
401:         return $cache[$string];
402:     }
403: }
404: 

Show some love! Wishlists for Geoffrey, Ryan P., and Ryan M.

SimplePie is © 2004–2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue and contributors. Licensed under the BSD License. Hosted thanks to Matt Mullenweg, API documentation generated by ApiGen 2.6.1. Variation on the Feed Icon by Wolfgang Bartelme.