1: <?php
2: 3: 4: 5: 6: 7: 8: 9: 10: 11: 12: 13: 14: 15: 16: 17: 18: 19: 20: 21: 22: 23: 24: 25: 26: 27: 28: 29: 30: 31: 32: 33: 34: 35: 36: 37: 38: 39: 40: 41: 42: 43:
44:
45: 46: 47: 48: 49: 50: 51: 52: 53:
54: class SimplePie_Parser
55: {
56: var $error_code;
57: var $error_string;
58: var $current_line;
59: var $current_column;
60: var $current_byte;
61: var $separator = ' ';
62: var $namespace = array('');
63: var $element = array('');
64: var $xml_base = array('');
65: var $xml_base_explicit = array(false);
66: var $xml_lang = array('');
67: var $data = array();
68: var $datas = array(array());
69: var $current_xhtml_construct = -1;
70: var $encoding;
71: protected $registry;
72:
73: public function set_registry(SimplePie_Registry $registry)
74: {
75: $this->registry = $registry;
76: }
77:
78: public function parse(&$data, $encoding)
79: {
80:
81: if (strtoupper($encoding) === 'US-ASCII')
82: {
83: $this->encoding = 'UTF-8';
84: }
85: else
86: {
87: $this->encoding = $encoding;
88: }
89:
90:
91:
92: if (substr($data, 0, 4) === "\x00\x00\xFE\xFF")
93: {
94: $data = substr($data, 4);
95: }
96:
97: elseif (substr($data, 0, 4) === "\xFF\xFE\x00\x00")
98: {
99: $data = substr($data, 4);
100: }
101:
102: elseif (substr($data, 0, 2) === "\xFE\xFF")
103: {
104: $data = substr($data, 2);
105: }
106:
107: elseif (substr($data, 0, 2) === "\xFF\xFE")
108: {
109: $data = substr($data, 2);
110: }
111:
112: elseif (substr($data, 0, 3) === "\xEF\xBB\xBF")
113: {
114: $data = substr($data, 3);
115: }
116:
117: if (substr($data, 0, 5) === '<?xml' && strspn(substr($data, 5, 1), "\x09\x0A\x0D\x20") && ($pos = strpos($data, '?>')) !== false)
118: {
119: $declaration = $this->registry->create('XML_Declaration_Parser', array(substr($data, 5, $pos - 5)));
120: if ($declaration->parse())
121: {
122: $data = substr($data, $pos + 2);
123: $data = '<?xml version="' . $declaration->version . '" encoding="' . $encoding . '" standalone="' . (($declaration->standalone) ? 'yes' : 'no') . '"?>' . $data;
124: }
125: else
126: {
127: $this->error_string = 'SimplePie bug! Please report this!';
128: return false;
129: }
130: }
131:
132: $return = true;
133:
134: static $xml_is_sane = null;
135: if ($xml_is_sane === null)
136: {
137: $parser_check = xml_parser_create();
138: xml_parse_into_struct($parser_check, '<foo>&</foo>', $values);
139: xml_parser_free($parser_check);
140: $xml_is_sane = isset($values[0]['value']);
141: }
142:
143:
144: if ($xml_is_sane)
145: {
146: $xml = xml_parser_create_ns($this->encoding, $this->separator);
147: xml_parser_set_option($xml, XML_OPTION_SKIP_WHITE, 1);
148: xml_parser_set_option($xml, XML_OPTION_CASE_FOLDING, 0);
149: xml_set_object($xml, $this);
150: xml_set_character_data_handler($xml, 'cdata');
151: xml_set_element_handler($xml, 'tag_open', 'tag_close');
152:
153:
154: if (!xml_parse($xml, $data, true))
155: {
156: $this->error_code = xml_get_error_code($xml);
157: $this->error_string = xml_error_string($this->error_code);
158: $return = false;
159: }
160: $this->current_line = xml_get_current_line_number($xml);
161: $this->current_column = xml_get_current_column_number($xml);
162: $this->current_byte = xml_get_current_byte_index($xml);
163: xml_parser_free($xml);
164: return $return;
165: }
166: else
167: {
168: libxml_clear_errors();
169: $xml = new XMLReader();
170: $xml->xml($data);
171: while (@$xml->read())
172: {
173: switch ($xml->nodeType)
174: {
175:
176: case constant('XMLReader::END_ELEMENT'):
177: if ($xml->namespaceURI !== '')
178: {
179: $tagName = $xml->namespaceURI . $this->separator . $xml->localName;
180: }
181: else
182: {
183: $tagName = $xml->localName;
184: }
185: $this->tag_close(null, $tagName);
186: break;
187: case constant('XMLReader::ELEMENT'):
188: $empty = $xml->isEmptyElement;
189: if ($xml->namespaceURI !== '')
190: {
191: $tagName = $xml->namespaceURI . $this->separator . $xml->localName;
192: }
193: else
194: {
195: $tagName = $xml->localName;
196: }
197: $attributes = array();
198: while ($xml->moveToNextAttribute())
199: {
200: if ($xml->namespaceURI !== '')
201: {
202: $attrName = $xml->namespaceURI . $this->separator . $xml->localName;
203: }
204: else
205: {
206: $attrName = $xml->localName;
207: }
208: $attributes[$attrName] = $xml->value;
209: }
210: $this->tag_open(null, $tagName, $attributes);
211: if ($empty)
212: {
213: $this->tag_close(null, $tagName);
214: }
215: break;
216: case constant('XMLReader::TEXT'):
217:
218: case constant('XMLReader::CDATA'):
219: $this->cdata(null, $xml->value);
220: break;
221: }
222: }
223: if ($error = libxml_get_last_error())
224: {
225: $this->error_code = $error->code;
226: $this->error_string = $error->message;
227: $this->current_line = $error->line;
228: $this->current_column = $error->column;
229: return false;
230: }
231: else
232: {
233: return true;
234: }
235: }
236: }
237:
238: public function get_error_code()
239: {
240: return $this->error_code;
241: }
242:
243: public function get_error_string()
244: {
245: return $this->error_string;
246: }
247:
248: public function get_current_line()
249: {
250: return $this->current_line;
251: }
252:
253: public function get_current_column()
254: {
255: return $this->current_column;
256: }
257:
258: public function get_current_byte()
259: {
260: return $this->current_byte;
261: }
262:
263: public function get_data()
264: {
265: return $this->data;
266: }
267:
268: public function tag_open($parser, $tag, $attributes)
269: {
270: list($this->namespace[], $this->element[]) = $this->split_ns($tag);
271:
272: $attribs = array();
273: foreach ($attributes as $name => $value)
274: {
275: list($attrib_namespace, $attribute) = $this->split_ns($name);
276: $attribs[$attrib_namespace][$attribute] = $value;
277: }
278:
279: if (isset($attribs[SIMPLEPIE_NAMESPACE_XML]['base']))
280: {
281: $this->xml_base[] = $this->registry->call('Misc', 'absolutize_url', array($attribs[SIMPLEPIE_NAMESPACE_XML]['base'], end($this->xml_base)));
282: $this->xml_base_explicit[] = true;
283: }
284: else
285: {
286: $this->xml_base[] = end($this->xml_base);
287: $this->xml_base_explicit[] = end($this->xml_base_explicit);
288: }
289:
290: if (isset($attribs[SIMPLEPIE_NAMESPACE_XML]['lang']))
291: {
292: $this->xml_lang[] = $attribs[SIMPLEPIE_NAMESPACE_XML]['lang'];
293: }
294: else
295: {
296: $this->xml_lang[] = end($this->xml_lang);
297: }
298:
299: if ($this->current_xhtml_construct >= 0)
300: {
301: $this->current_xhtml_construct++;
302: if (end($this->namespace) === SIMPLEPIE_NAMESPACE_XHTML)
303: {
304: $this->data['data'] .= '<' . end($this->element);
305: if (isset($attribs['']))
306: {
307: foreach ($attribs[''] as $name => $value)
308: {
309: $this->data['data'] .= ' ' . $name . '="' . htmlspecialchars($value, ENT_COMPAT, $this->encoding) . '"';
310: }
311: }
312: $this->data['data'] .= '>';
313: }
314: }
315: else
316: {
317: $this->datas[] =& $this->data;
318: $this->data =& $this->data['child'][end($this->namespace)][end($this->element)][];
319: $this->data = array('data' => '', 'attribs' => $attribs, 'xml_base' => end($this->xml_base), 'xml_base_explicit' => end($this->xml_base_explicit), 'xml_lang' => end($this->xml_lang));
320: if ((end($this->namespace) === SIMPLEPIE_NAMESPACE_ATOM_03 && in_array(end($this->element), array('title', 'tagline', 'copyright', 'info', 'summary', 'content')) && isset($attribs['']['mode']) && $attribs['']['mode'] === 'xml')
321: || (end($this->namespace) === SIMPLEPIE_NAMESPACE_ATOM_10 && in_array(end($this->element), array('rights', 'subtitle', 'summary', 'info', 'title', 'content')) && isset($attribs['']['type']) && $attribs['']['type'] === 'xhtml')
322: || (end($this->namespace) === SIMPLEPIE_NAMESPACE_RSS_20 && in_array(end($this->element), array('title')))
323: || (end($this->namespace) === SIMPLEPIE_NAMESPACE_RSS_090 && in_array(end($this->element), array('title')))
324: || (end($this->namespace) === SIMPLEPIE_NAMESPACE_RSS_10 && in_array(end($this->element), array('title'))))
325: {
326: $this->current_xhtml_construct = 0;
327: }
328: }
329: }
330:
331: public function cdata($parser, $cdata)
332: {
333: if ($this->current_xhtml_construct >= 0)
334: {
335: $this->data['data'] .= htmlspecialchars($cdata, ENT_QUOTES, $this->encoding);
336: }
337: else
338: {
339: $this->data['data'] .= $cdata;
340: }
341: }
342:
343: public function tag_close($parser, $tag)
344: {
345: if ($this->current_xhtml_construct >= 0)
346: {
347: $this->current_xhtml_construct--;
348: if (end($this->namespace) === SIMPLEPIE_NAMESPACE_XHTML && !in_array(end($this->element), array('area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 'img', 'input', 'isindex', 'link', 'meta', 'param')))
349: {
350: $this->data['data'] .= '</' . end($this->element) . '>';
351: }
352: }
353: if ($this->current_xhtml_construct === -1)
354: {
355: $this->data =& $this->datas[count($this->datas) - 1];
356: array_pop($this->datas);
357: }
358:
359: array_pop($this->element);
360: array_pop($this->namespace);
361: array_pop($this->xml_base);
362: array_pop($this->xml_base_explicit);
363: array_pop($this->xml_lang);
364: }
365:
366: public function split_ns($string)
367: {
368: static $cache = array();
369: if (!isset($cache[$string]))
370: {
371: if ($pos = strpos($string, $this->separator))
372: {
373: static $separator_length;
374: if (!$separator_length)
375: {
376: $separator_length = strlen($this->separator);
377: }
378: $namespace = substr($string, 0, $pos);
379: $local_name = substr($string, $pos + $separator_length);
380: if (strtolower($namespace) === SIMPLEPIE_NAMESPACE_ITUNES)
381: {
382: $namespace = SIMPLEPIE_NAMESPACE_ITUNES;
383: }
384:
385:
386: if ($namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG ||
387: $namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG2 ||
388: $namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG3 ||
389: $namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG4 ||
390: $namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG5 )
391: {
392: $namespace = SIMPLEPIE_NAMESPACE_MEDIARSS;
393: }
394: $cache[$string] = array($namespace, $local_name);
395: }
396: else
397: {
398: $cache[$string] = array('', $string);
399: }
400: }
401: return $cache[$string];
402: }
403: }
404: