SimplePie: PHP-based RSS and Atom feed handling
 
  • Overview
  • Demo
  • Blog
  • Download
  • Documentation
  • API Docs
  • Support
  • Issue Tracker
  • FAQ
  • Overview
  • Package
  • Class
  • Tree
  • Deprecated

Packages

  • SimplePie
    • API
    • Caching
    • HTTP
    • Parsing

Classes

  • SimplePie_Content_Type_Sniffer
  • SimplePie_File
  • SimplePie_gzdecode
  • SimplePie_HTTP_Parser
  • SimplePie_IRI
  • SimplePie_Net_IPv6
  1: <?php
  2: /**
  3:  * SimplePie
  4:  *
  5:  * A PHP-Based RSS and Atom Feed Framework.
  6:  * Takes the hard work out of managing a complete RSS/Atom solution.
  7:  *
  8:  * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
  9:  * All rights reserved.
 10:  *
 11:  * Redistribution and use in source and binary forms, with or without modification, are
 12:  * permitted provided that the following conditions are met:
 13:  *
 14:  *  * Redistributions of source code must retain the above copyright notice, this list of
 15:  *    conditions and the following disclaimer.
 16:  *
 17:  *  * Redistributions in binary form must reproduce the above copyright notice, this list
 18:  *    of conditions and the following disclaimer in the documentation and/or other materials
 19:  *    provided with the distribution.
 20:  *
 21:  *  * Neither the name of the SimplePie Team nor the names of its contributors may be used
 22:  *    to endorse or promote products derived from this software without specific prior
 23:  *    written permission.
 24:  *
 25:  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
 26:  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
 27:  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
 28:  * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 29:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 30:  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 31:  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 32:  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 33:  * POSSIBILITY OF SUCH DAMAGE.
 34:  *
 35:  * @package SimplePie
 36:  * @version 1.3
 37:  * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
 38:  * @author Ryan Parman
 39:  * @author Geoffrey Sneddon
 40:  * @author Ryan McCue
 41:  * @link http://simplepie.org/ SimplePie
 42:  * @license http://www.opensource.org/licenses/bsd-license.php BSD License
 43:  */
 44: 
 45: 
 46: /**
 47:  * Content-type sniffing
 48:  *
 49:  * Based on the rules in http://tools.ietf.org/html/draft-abarth-mime-sniff-06
 50:  *
 51:  * This is used since we can't always trust Content-Type headers, and is based
 52:  * upon the HTML5 parsing rules.
 53:  *
 54:  *
 55:  * This class can be overloaded with {@see SimplePie::set_content_type_sniffer_class()}
 56:  *
 57:  * @package SimplePie
 58:  * @subpackage HTTP
 59:  */
 60: class SimplePie_Content_Type_Sniffer
 61: {
 62:     /**
 63:      * File object
 64:      *
 65:      * @var SimplePie_File
 66:      */
 67:     var $file;
 68: 
 69:     /**
 70:      * Create an instance of the class with the input file
 71:      *
 72:      * @param SimplePie_Content_Type_Sniffer $file Input file
 73:      */
 74:     public function __construct($file)
 75:     {
 76:         $this->file = $file;
 77:     }
 78: 
 79:     /**
 80:      * Get the Content-Type of the specified file
 81:      *
 82:      * @return string Actual Content-Type
 83:      */
 84:     public function get_type()
 85:     {
 86:         if (isset($this->file->headers['content-type']))
 87:         {
 88:             if (!isset($this->file->headers['content-encoding'])
 89:                 && ($this->file->headers['content-type'] === 'text/plain'
 90:                     || $this->file->headers['content-type'] === 'text/plain; charset=ISO-8859-1'
 91:                     || $this->file->headers['content-type'] === 'text/plain; charset=iso-8859-1'
 92:                     || $this->file->headers['content-type'] === 'text/plain; charset=UTF-8'))
 93:             {
 94:                 return $this->text_or_binary();
 95:             }
 96: 
 97:             if (($pos = strpos($this->file->headers['content-type'], ';')) !== false)
 98:             {
 99:                 $official = substr($this->file->headers['content-type'], 0, $pos);
100:             }
101:             else
102:             {
103:                 $official = $this->file->headers['content-type'];
104:             }
105:             $official = trim(strtolower($official));
106: 
107:             if ($official === 'unknown/unknown'
108:                 || $official === 'application/unknown')
109:             {
110:                 return $this->unknown();
111:             }
112:             elseif (substr($official, -4) === '+xml'
113:                 || $official === 'text/xml'
114:                 || $official === 'application/xml')
115:             {
116:                 return $official;
117:             }
118:             elseif (substr($official, 0, 6) === 'image/')
119:             {
120:                 if ($return = $this->image())
121:                 {
122:                     return $return;
123:                 }
124:                 else
125:                 {
126:                     return $official;
127:                 }
128:             }
129:             elseif ($official === 'text/html')
130:             {
131:                 return $this->feed_or_html();
132:             }
133:             else
134:             {
135:                 return $official;
136:             }
137:         }
138:         else
139:         {
140:             return $this->unknown();
141:         }
142:     }
143: 
144:     /**
145:      * Sniff text or binary
146:      *
147:      * @return string Actual Content-Type
148:      */
149:     public function text_or_binary()
150:     {
151:         if (substr($this->file->body, 0, 2) === "\xFE\xFF"
152:             || substr($this->file->body, 0, 2) === "\xFF\xFE"
153:             || substr($this->file->body, 0, 4) === "\x00\x00\xFE\xFF"
154:             || substr($this->file->body, 0, 3) === "\xEF\xBB\xBF")
155:         {
156:             return 'text/plain';
157:         }
158:         elseif (preg_match('/[\x00-\x08\x0E-\x1A\x1C-\x1F]/', $this->file->body))
159:         {
160:             return 'application/octect-stream';
161:         }
162:         else
163:         {
164:             return 'text/plain';
165:         }
166:     }
167: 
168:     /**
169:      * Sniff unknown
170:      *
171:      * @return string Actual Content-Type
172:      */
173:     public function unknown()
174:     {
175:         $ws = strspn($this->file->body, "\x09\x0A\x0B\x0C\x0D\x20");
176:         if (strtolower(substr($this->file->body, $ws, 14)) === '<!doctype html'
177:             || strtolower(substr($this->file->body, $ws, 5)) === '<html'
178:             || strtolower(substr($this->file->body, $ws, 7)) === '<script')
179:         {
180:             return 'text/html';
181:         }
182:         elseif (substr($this->file->body, 0, 5) === '%PDF-')
183:         {
184:             return 'application/pdf';
185:         }
186:         elseif (substr($this->file->body, 0, 11) === '%!PS-Adobe-')
187:         {
188:             return 'application/postscript';
189:         }
190:         elseif (substr($this->file->body, 0, 6) === 'GIF87a'
191:             || substr($this->file->body, 0, 6) === 'GIF89a')
192:         {
193:             return 'image/gif';
194:         }
195:         elseif (substr($this->file->body, 0, 8) === "\x89\x50\x4E\x47\x0D\x0A\x1A\x0A")
196:         {
197:             return 'image/png';
198:         }
199:         elseif (substr($this->file->body, 0, 3) === "\xFF\xD8\xFF")
200:         {
201:             return 'image/jpeg';
202:         }
203:         elseif (substr($this->file->body, 0, 2) === "\x42\x4D")
204:         {
205:             return 'image/bmp';
206:         }
207:         elseif (substr($this->file->body, 0, 4) === "\x00\x00\x01\x00")
208:         {
209:             return 'image/vnd.microsoft.icon';
210:         }
211:         else
212:         {
213:             return $this->text_or_binary();
214:         }
215:     }
216: 
217:     /**
218:      * Sniff images
219:      *
220:      * @return string Actual Content-Type
221:      */
222:     public function image()
223:     {
224:         if (substr($this->file->body, 0, 6) === 'GIF87a'
225:             || substr($this->file->body, 0, 6) === 'GIF89a')
226:         {
227:             return 'image/gif';
228:         }
229:         elseif (substr($this->file->body, 0, 8) === "\x89\x50\x4E\x47\x0D\x0A\x1A\x0A")
230:         {
231:             return 'image/png';
232:         }
233:         elseif (substr($this->file->body, 0, 3) === "\xFF\xD8\xFF")
234:         {
235:             return 'image/jpeg';
236:         }
237:         elseif (substr($this->file->body, 0, 2) === "\x42\x4D")
238:         {
239:             return 'image/bmp';
240:         }
241:         elseif (substr($this->file->body, 0, 4) === "\x00\x00\x01\x00")
242:         {
243:             return 'image/vnd.microsoft.icon';
244:         }
245:         else
246:         {
247:             return false;
248:         }
249:     }
250: 
251:     /**
252:      * Sniff HTML
253:      *
254:      * @return string Actual Content-Type
255:      */
256:     public function feed_or_html()
257:     {
258:         $len = strlen($this->file->body);
259:         $pos = strspn($this->file->body, "\x09\x0A\x0D\x20");
260: 
261:         while ($pos < $len)
262:         {
263:             switch ($this->file->body[$pos])
264:             {
265:                 case "\x09":
266:                 case "\x0A":
267:                 case "\x0D":
268:                 case "\x20":
269:                     $pos += strspn($this->file->body, "\x09\x0A\x0D\x20", $pos);
270:                     continue 2;
271: 
272:                 case '<':
273:                     $pos++;
274:                     break;
275: 
276:                 default:
277:                     return 'text/html';
278:             }
279: 
280:             if (substr($this->file->body, $pos, 3) === '!--')
281:             {
282:                 $pos += 3;
283:                 if ($pos < $len && ($pos = strpos($this->file->body, '-->', $pos)) !== false)
284:                 {
285:                     $pos += 3;
286:                 }
287:                 else
288:                 {
289:                     return 'text/html';
290:                 }
291:             }
292:             elseif (substr($this->file->body, $pos, 1) === '!')
293:             {
294:                 if ($pos < $len && ($pos = strpos($this->file->body, '>', $pos)) !== false)
295:                 {
296:                     $pos++;
297:                 }
298:                 else
299:                 {
300:                     return 'text/html';
301:                 }
302:             }
303:             elseif (substr($this->file->body, $pos, 1) === '?')
304:             {
305:                 if ($pos < $len && ($pos = strpos($this->file->body, '?>', $pos)) !== false)
306:                 {
307:                     $pos += 2;
308:                 }
309:                 else
310:                 {
311:                     return 'text/html';
312:                 }
313:             }
314:             elseif (substr($this->file->body, $pos, 3) === 'rss'
315:                 || substr($this->file->body, $pos, 7) === 'rdf:RDF')
316:             {
317:                 return 'application/rss+xml';
318:             }
319:             elseif (substr($this->file->body, $pos, 4) === 'feed')
320:             {
321:                 return 'application/atom+xml';
322:             }
323:             else
324:             {
325:                 return 'text/html';
326:             }
327:         }
328: 
329:         return 'text/html';
330:     }
331: }
332: 
333: 

Show some love! Wishlists for Geoffrey, Ryan P., and Ryan M.

SimplePie is © 2004–2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue and contributors. Licensed under the BSD License. Hosted thanks to Matt Mullenweg, API documentation generated by ApiGen 2.6.1. Variation on the Feed Icon by Wolfgang Bartelme.