1: <?php
2: 3: 4: 5: 6: 7: 8: 9: 10: 11: 12: 13: 14: 15: 16: 17: 18: 19: 20: 21: 22: 23: 24: 25: 26: 27: 28: 29: 30: 31: 32: 33: 34: 35: 36: 37: 38: 39: 40: 41: 42: 43:
44:
45: 46: 47: 48: 49: 50: 51: 52: 53:
54: class SimplePie_Sanitize
55: {
56:
57: var $base;
58:
59:
60: var $remove_div = true;
61: var $image_handler = '';
62: var $strip_htmltags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style');
63: var $encode_instead_of_strip = false;
64: var $strip_attributes = array('bgsound', 'class', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc');
65: var $strip_comments = false;
66: var $output_encoding = 'UTF-8';
67: var $enable_cache = true;
68: var $cache_location = './cache';
69: var $cache_name_function = 'md5';
70: var $timeout = 10;
71: var $useragent = '';
72: var $force_fsockopen = false;
73: var $replace_url_attributes = null;
74:
75: public function __construct()
76: {
77:
78: $this->set_url_replacements(null);
79: }
80:
81: public function remove_div($enable = true)
82: {
83: $this->remove_div = (bool) $enable;
84: }
85:
86: public function set_image_handler($page = false)
87: {
88: if ($page)
89: {
90: $this->image_handler = (string) $page;
91: }
92: else
93: {
94: $this->image_handler = false;
95: }
96: }
97:
98: public function set_registry(SimplePie_Registry $registry)
99: {
100: $this->registry = $registry;
101: }
102:
103: public function pass_cache_data($enable_cache = true, $cache_location = './cache', $cache_name_function = 'md5', $cache_class = 'SimplePie_Cache')
104: {
105: if (isset($enable_cache))
106: {
107: $this->enable_cache = (bool) $enable_cache;
108: }
109:
110: if ($cache_location)
111: {
112: $this->cache_location = (string) $cache_location;
113: }
114:
115: if ($cache_name_function)
116: {
117: $this->cache_name_function = (string) $cache_name_function;
118: }
119: }
120:
121: public function pass_file_data($file_class = 'SimplePie_File', $timeout = 10, $useragent = '', $force_fsockopen = false)
122: {
123: if ($timeout)
124: {
125: $this->timeout = (string) $timeout;
126: }
127:
128: if ($useragent)
129: {
130: $this->useragent = (string) $useragent;
131: }
132:
133: if ($force_fsockopen)
134: {
135: $this->force_fsockopen = (string) $force_fsockopen;
136: }
137: }
138:
139: public function strip_htmltags($tags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style'))
140: {
141: if ($tags)
142: {
143: if (is_array($tags))
144: {
145: $this->strip_htmltags = $tags;
146: }
147: else
148: {
149: $this->strip_htmltags = explode(',', $tags);
150: }
151: }
152: else
153: {
154: $this->strip_htmltags = false;
155: }
156: }
157:
158: public function encode_instead_of_strip($encode = false)
159: {
160: $this->encode_instead_of_strip = (bool) $encode;
161: }
162:
163: public function strip_attributes($attribs = array('bgsound', 'class', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc'))
164: {
165: if ($attribs)
166: {
167: if (is_array($attribs))
168: {
169: $this->strip_attributes = $attribs;
170: }
171: else
172: {
173: $this->strip_attributes = explode(',', $attribs);
174: }
175: }
176: else
177: {
178: $this->strip_attributes = false;
179: }
180: }
181:
182: public function strip_comments($strip = false)
183: {
184: $this->strip_comments = (bool) $strip;
185: }
186:
187: public function set_output_encoding($encoding = 'UTF-8')
188: {
189: $this->output_encoding = (string) $encoding;
190: }
191:
192: 193: 194: 195: 196: 197: 198: 199: 200: 201: 202:
203: public function set_url_replacements($element_attribute = null)
204: {
205: if ($element_attribute === null)
206: {
207: $element_attribute = array(
208: 'a' => 'href',
209: 'area' => 'href',
210: 'blockquote' => 'cite',
211: 'del' => 'cite',
212: 'form' => 'action',
213: 'img' => array(
214: 'longdesc',
215: 'src'
216: ),
217: 'input' => 'src',
218: 'ins' => 'cite',
219: 'q' => 'cite'
220: );
221: }
222: $this->replace_url_attributes = (array) $element_attribute;
223: }
224:
225: public function sanitize($data, $type, $base = '')
226: {
227: $data = trim($data);
228: if ($data !== '' || $type & SIMPLEPIE_CONSTRUCT_IRI)
229: {
230: if ($type & SIMPLEPIE_CONSTRUCT_MAYBE_HTML)
231: {
232: if (preg_match('/(&(#(x[0-9a-fA-F]+|[0-9]+)|[a-zA-Z0-9]+)|<\/[A-Za-z][^\x09\x0A\x0B\x0C\x0D\x20\x2F\x3E]*' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>)/', $data))
233: {
234: $type |= SIMPLEPIE_CONSTRUCT_HTML;
235: }
236: else
237: {
238: $type |= SIMPLEPIE_CONSTRUCT_TEXT;
239: }
240: }
241:
242: if ($type & SIMPLEPIE_CONSTRUCT_BASE64)
243: {
244: $data = base64_decode($data);
245: }
246:
247: if ($type & (SIMPLEPIE_CONSTRUCT_HTML | SIMPLEPIE_CONSTRUCT_XHTML))
248: {
249:
250: $document = new DOMDocument();
251: $document->encoding = 'UTF-8';
252: $data = $this->preprocess($data, $type);
253:
254: set_error_handler(array('SimplePie_Misc', 'silence_errors'));
255: $document->loadHTML($data);
256: restore_error_handler();
257:
258:
259: if ($this->strip_comments)
260: {
261: $xpath = new DOMXPath($document);
262: $comments = $xpath->query('//comment()');
263:
264: foreach ($comments as $comment)
265: {
266: $comment->parentNode->removeChild($comment);
267: }
268: }
269:
270:
271:
272:
273: if ($this->strip_htmltags)
274: {
275: foreach ($this->strip_htmltags as $tag)
276: {
277: $this->strip_tag($tag, $document, $type);
278: }
279: }
280:
281: if ($this->strip_attributes)
282: {
283: foreach ($this->strip_attributes as $attrib)
284: {
285: $this->strip_attr($attrib, $document);
286: }
287: }
288:
289:
290: $this->base = $base;
291: foreach ($this->replace_url_attributes as $element => $attributes)
292: {
293: $this->replace_urls($document, $element, $attributes);
294: }
295:
296:
297: if (isset($this->image_handler) && ((string) $this->image_handler) !== '' && $this->enable_cache)
298: {
299: $images = $document->getElementsByTagName('img');
300: foreach ($images as $img)
301: {
302: if ($img->hasAttribute('src'))
303: {
304: $image_url = call_user_func($this->cache_name_function, $img->getAttribute('src'));
305: $cache = $this->registry->call('Cache', 'create', array($this->cache_location, $image_url, 'spi'));
306:
307: if ($cache->load())
308: {
309: $img->setAttribute('src', $this->image_handler . $image_url);
310: }
311: else
312: {
313: $file = $this->registry->create('File', array($img['attribs']['src']['data'], $this->timeout, 5, array('X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']), $this->useragent, $this->force_fsockopen));
314: $headers = $file->headers;
315:
316: if ($file->success && ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300)))
317: {
318: if ($cache->save(array('headers' => $file->headers, 'body' => $file->body)))
319: {
320: $img->setAttribute('src', $this->image_handler . $image_url);
321: }
322: else
323: {
324: trigger_error("$this->cache_location is not writeable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING);
325: }
326: }
327: }
328: }
329: }
330: }
331:
332:
333:
334: if ($document->firstChild instanceof DOMDocumentType)
335: {
336: $document->removeChild($document->firstChild);
337: }
338:
339:
340: $real_body = $document->getElementsByTagName('body')->item(0)->childNodes->item(0);
341: $document->replaceChild($real_body, $document->firstChild);
342:
343:
344: $data = trim($document->saveHTML());
345:
346: if ($this->remove_div)
347: {
348: $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '', $data);
349: $data = preg_replace('/<\/div>$/', '', $data);
350: }
351: else
352: {
353: $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '<div>', $data);
354: }
355: }
356:
357: if ($type & SIMPLEPIE_CONSTRUCT_IRI)
358: {
359: $data = $this->registry->call('Misc', 'absolutize_url', array($data, $base));
360: }
361:
362: if ($type & (SIMPLEPIE_CONSTRUCT_TEXT | SIMPLEPIE_CONSTRUCT_IRI))
363: {
364: $data = htmlspecialchars($data, ENT_COMPAT, 'UTF-8');
365: }
366:
367: if ($this->output_encoding !== 'UTF-8')
368: {
369: $data = $this->registry->call('Misc', 'change_encoding', array($data, 'UTF-8', $this->output_encoding));
370: }
371: }
372: return $data;
373: }
374:
375: protected function preprocess($html, $type)
376: {
377: $ret = '';
378: if ($type & ~SIMPLEPIE_CONSTRUCT_XHTML)
379: {
380:
381:
382: $html = '<div>' . $html . '</div>';
383: $ret .= '<!DOCTYPE html>';
384: $content_type = 'text/html';
385: }
386: else
387: {
388: $ret .= '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">';
389: $content_type = 'application/xhtml+xml';
390: }
391:
392: $ret .= '<html><head>';
393: $ret .= '<meta http-equiv="Content-Type" content="' . $content_type . '; charset=utf-8" />';
394: $ret .= '</head><body>' . $html . '</body></html>';
395: return $ret;
396: }
397:
398: public function replace_urls($document, $tag, $attributes)
399: {
400: if (!is_array($attributes))
401: {
402: $attributes = array($attributes);
403: }
404:
405: if (!is_array($this->strip_htmltags) || !in_array($tag, $this->strip_htmltags))
406: {
407: $elements = $document->getElementsByTagName($tag);
408: foreach ($elements as $element)
409: {
410: foreach ($attributes as $attribute)
411: {
412: if ($element->hasAttribute($attribute))
413: {
414: $value = $this->registry->call('Misc', 'absolutize_url', array($element->getAttribute($attribute), $this->base));
415: $element->setAttribute($attribute, $value);
416: }
417: }
418: }
419: }
420: }
421:
422: public function do_strip_htmltags($match)
423: {
424: if ($this->encode_instead_of_strip)
425: {
426: if (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style')))
427: {
428: $match[1] = htmlspecialchars($match[1], ENT_COMPAT, 'UTF-8');
429: $match[2] = htmlspecialchars($match[2], ENT_COMPAT, 'UTF-8');
430: return "<$match[1]$match[2]>$match[3]</$match[1]>";
431: }
432: else
433: {
434: return htmlspecialchars($match[0], ENT_COMPAT, 'UTF-8');
435: }
436: }
437: elseif (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style')))
438: {
439: return $match[4];
440: }
441: else
442: {
443: return '';
444: }
445: }
446:
447: protected function strip_tag($tag, $document, $type)
448: {
449: $xpath = new DOMXPath($document);
450: $elements = $xpath->query('body//' . $tag);
451: if ($this->encode_instead_of_strip)
452: {
453: foreach ($elements as $element)
454: {
455: $fragment = $document->createDocumentFragment();
456:
457:
458: if (!in_array($tag, array('script', 'style')))
459: {
460: $text = '<' . $tag;
461: if ($element->hasAttributes())
462: {
463: $attrs = array();
464: foreach ($element->attributes as $name => $attr)
465: {
466: $value = $attr->value;
467:
468:
469: if (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_XHTML))
470: {
471: $value = $name;
472: }
473:
474: elseif (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_HTML))
475: {
476: $attrs[] = $name;
477: continue;
478: }
479:
480:
481: $attrs[] = $name . '="' . $attr->value . '"';
482: }
483: $text .= ' ' . implode(' ', $attrs);
484: }
485: $text .= '>';
486: $fragment->appendChild(new DOMText($text));
487: }
488:
489: $number = $element->childNodes->length;
490: for ($i = $number; $i > 0; $i--)
491: {
492: $child = $element->childNodes->item(0);
493: $fragment->appendChild($child);
494: }
495:
496: if (!in_array($tag, array('script', 'style')))
497: {
498: $fragment->appendChild(new DOMText('</' . $tag . '>'));
499: }
500:
501: $element->parentNode->replaceChild($fragment, $element);
502: }
503:
504: return;
505: }
506: elseif (in_array($tag, array('script', 'style')))
507: {
508: foreach ($elements as $element)
509: {
510: $element->parentNode->removeChild($element);
511: }
512:
513: return;
514: }
515: else
516: {
517: foreach ($elements as $element)
518: {
519: $fragment = $document->createDocumentFragment();
520: $number = $element->childNodes->length;
521: for ($i = $number; $i > 0; $i--)
522: {
523: $child = $element->childNodes->item(0);
524: $fragment->appendChild($child);
525: }
526:
527: $element->parentNode->replaceChild($fragment, $element);
528: }
529: }
530: }
531:
532: protected function strip_attr($attrib, $document)
533: {
534: $xpath = new DOMXPath($document);
535: $elements = $xpath->query('//*[@' . $attrib . ']');
536:
537: foreach ($elements as $element)
538: {
539: $element->removeAttribute($attrib);
540: }
541: }
542: }
543: