SimplePie: PHP-based RSS and Atom feed handling
 
  • Overview
  • Demo
  • Blog
  • Download
  • Documentation
  • API Docs
  • Support
  • Issue Tracker
  • FAQ
  • Overview
  • Package
  • Class
  • Tree
  • Deprecated

Packages

  • SimplePie
    • API
    • Caching
    • HTTP
    • Parsing

Classes

  • SimplePie_Content_Type_Sniffer
  • SimplePie_File
  • SimplePie_gzdecode
  • SimplePie_HTTP_Parser
  • SimplePie_IRI
  • SimplePie_Net_IPv6
  1: <?php
  2: /**
  3:  * SimplePie
  4:  *
  5:  * A PHP-Based RSS and Atom Feed Framework.
  6:  * Takes the hard work out of managing a complete RSS/Atom solution.
  7:  *
  8:  * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
  9:  * All rights reserved.
 10:  *
 11:  * Redistribution and use in source and binary forms, with or without modification, are
 12:  * permitted provided that the following conditions are met:
 13:  *
 14:  *  * Redistributions of source code must retain the above copyright notice, this list of
 15:  *    conditions and the following disclaimer.
 16:  *
 17:  *  * Redistributions in binary form must reproduce the above copyright notice, this list
 18:  *    of conditions and the following disclaimer in the documentation and/or other materials
 19:  *    provided with the distribution.
 20:  *
 21:  *  * Neither the name of the SimplePie Team nor the names of its contributors may be used
 22:  *    to endorse or promote products derived from this software without specific prior
 23:  *    written permission.
 24:  *
 25:  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
 26:  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
 27:  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
 28:  * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 29:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 30:  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 31:  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 32:  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 33:  * POSSIBILITY OF SUCH DAMAGE.
 34:  *
 35:  * @package SimplePie
 36:  * @version 1.3
 37:  * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
 38:  * @author Ryan Parman
 39:  * @author Geoffrey Sneddon
 40:  * @author Ryan McCue
 41:  * @link http://simplepie.org/ SimplePie
 42:  * @license http://www.opensource.org/licenses/bsd-license.php BSD License
 43:  */
 44: 
 45: 
 46: /**
 47:  * HTTP Response Parser
 48:  *
 49:  * @package SimplePie
 50:  * @subpackage HTTP
 51:  */
 52: class SimplePie_HTTP_Parser
 53: {
 54:     /**
 55:      * HTTP Version
 56:      *
 57:      * @var float
 58:      */
 59:     public $http_version = 0.0;
 60: 
 61:     /**
 62:      * Status code
 63:      *
 64:      * @var int
 65:      */
 66:     public $status_code = 0;
 67: 
 68:     /**
 69:      * Reason phrase
 70:      *
 71:      * @var string
 72:      */
 73:     public $reason = '';
 74: 
 75:     /**
 76:      * Key/value pairs of the headers
 77:      *
 78:      * @var array
 79:      */
 80:     public $headers = array();
 81: 
 82:     /**
 83:      * Body of the response
 84:      *
 85:      * @var string
 86:      */
 87:     public $body = '';
 88: 
 89:     /**
 90:      * Current state of the state machine
 91:      *
 92:      * @var string
 93:      */
 94:     protected $state = 'http_version';
 95: 
 96:     /**
 97:      * Input data
 98:      *
 99:      * @var string
100:      */
101:     protected $data = '';
102: 
103:     /**
104:      * Input data length (to avoid calling strlen() everytime this is needed)
105:      *
106:      * @var int
107:      */
108:     protected $data_length = 0;
109: 
110:     /**
111:      * Current position of the pointer
112:      *
113:      * @var int
114:      */
115:     protected $position = 0;
116: 
117:     /**
118:      * Name of the hedaer currently being parsed
119:      *
120:      * @var string
121:      */
122:     protected $name = '';
123: 
124:     /**
125:      * Value of the hedaer currently being parsed
126:      *
127:      * @var string
128:      */
129:     protected $value = '';
130: 
131:     /**
132:      * Create an instance of the class with the input data
133:      *
134:      * @param string $data Input data
135:      */
136:     public function __construct($data)
137:     {
138:         $this->data = $data;
139:         $this->data_length = strlen($this->data);
140:     }
141: 
142:     /**
143:      * Parse the input data
144:      *
145:      * @return bool true on success, false on failure
146:      */
147:     public function parse()
148:     {
149:         while ($this->state && $this->state !== 'emit' && $this->has_data())
150:         {
151:             $state = $this->state;
152:             $this->$state();
153:         }
154:         $this->data = '';
155:         if ($this->state === 'emit' || $this->state === 'body')
156:         {
157:             return true;
158:         }
159:         else
160:         {
161:             $this->http_version = '';
162:             $this->status_code = '';
163:             $this->reason = '';
164:             $this->headers = array();
165:             $this->body = '';
166:             return false;
167:         }
168:     }
169: 
170:     /**
171:      * Check whether there is data beyond the pointer
172:      *
173:      * @return bool true if there is further data, false if not
174:      */
175:     protected function has_data()
176:     {
177:         return (bool) ($this->position < $this->data_length);
178:     }
179: 
180:     /**
181:      * See if the next character is LWS
182:      *
183:      * @return bool true if the next character is LWS, false if not
184:      */
185:     protected function is_linear_whitespace()
186:     {
187:         return (bool) ($this->data[$this->position] === "\x09"
188:             || $this->data[$this->position] === "\x20"
189:             || ($this->data[$this->position] === "\x0A"
190:                 && isset($this->data[$this->position + 1])
191:                 && ($this->data[$this->position + 1] === "\x09" || $this->data[$this->position + 1] === "\x20")));
192:     }
193: 
194:     /**
195:      * Parse the HTTP version
196:      */
197:     protected function http_version()
198:     {
199:         if (strpos($this->data, "\x0A") !== false && strtoupper(substr($this->data, 0, 5)) === 'HTTP/')
200:         {
201:             $len = strspn($this->data, '0123456789.', 5);
202:             $this->http_version = substr($this->data, 5, $len);
203:             $this->position += 5 + $len;
204:             if (substr_count($this->http_version, '.') <= 1)
205:             {
206:                 $this->http_version = (float) $this->http_version;
207:                 $this->position += strspn($this->data, "\x09\x20", $this->position);
208:                 $this->state = 'status';
209:             }
210:             else
211:             {
212:                 $this->state = false;
213:             }
214:         }
215:         else
216:         {
217:             $this->state = false;
218:         }
219:     }
220: 
221:     /**
222:      * Parse the status code
223:      */
224:     protected function status()
225:     {
226:         if ($len = strspn($this->data, '0123456789', $this->position))
227:         {
228:             $this->status_code = (int) substr($this->data, $this->position, $len);
229:             $this->position += $len;
230:             $this->state = 'reason';
231:         }
232:         else
233:         {
234:             $this->state = false;
235:         }
236:     }
237: 
238:     /**
239:      * Parse the reason phrase
240:      */
241:     protected function reason()
242:     {
243:         $len = strcspn($this->data, "\x0A", $this->position);
244:         $this->reason = trim(substr($this->data, $this->position, $len), "\x09\x0D\x20");
245:         $this->position += $len + 1;
246:         $this->state = 'new_line';
247:     }
248: 
249:     /**
250:      * Deal with a new line, shifting data around as needed
251:      */
252:     protected function new_line()
253:     {
254:         $this->value = trim($this->value, "\x0D\x20");
255:         if ($this->name !== '' && $this->value !== '')
256:         {
257:             $this->name = strtolower($this->name);
258:             // We should only use the last Content-Type header. c.f. issue #1
259:             if (isset($this->headers[$this->name]) && $this->name !== 'content-type')
260:             {
261:                 $this->headers[$this->name] .= ', ' . $this->value;
262:             }
263:             else
264:             {
265:                 $this->headers[$this->name] = $this->value;
266:             }
267:         }
268:         $this->name = '';
269:         $this->value = '';
270:         if (substr($this->data[$this->position], 0, 2) === "\x0D\x0A")
271:         {
272:             $this->position += 2;
273:             $this->state = 'body';
274:         }
275:         elseif ($this->data[$this->position] === "\x0A")
276:         {
277:             $this->position++;
278:             $this->state = 'body';
279:         }
280:         else
281:         {
282:             $this->state = 'name';
283:         }
284:     }
285: 
286:     /**
287:      * Parse a header name
288:      */
289:     protected function name()
290:     {
291:         $len = strcspn($this->data, "\x0A:", $this->position);
292:         if (isset($this->data[$this->position + $len]))
293:         {
294:             if ($this->data[$this->position + $len] === "\x0A")
295:             {
296:                 $this->position += $len;
297:                 $this->state = 'new_line';
298:             }
299:             else
300:             {
301:                 $this->name = substr($this->data, $this->position, $len);
302:                 $this->position += $len + 1;
303:                 $this->state = 'value';
304:             }
305:         }
306:         else
307:         {
308:             $this->state = false;
309:         }
310:     }
311: 
312:     /**
313:      * Parse LWS, replacing consecutive LWS characters with a single space
314:      */
315:     protected function linear_whitespace()
316:     {
317:         do
318:         {
319:             if (substr($this->data, $this->position, 2) === "\x0D\x0A")
320:             {
321:                 $this->position += 2;
322:             }
323:             elseif ($this->data[$this->position] === "\x0A")
324:             {
325:                 $this->position++;
326:             }
327:             $this->position += strspn($this->data, "\x09\x20", $this->position);
328:         } while ($this->has_data() && $this->is_linear_whitespace());
329:         $this->value .= "\x20";
330:     }
331: 
332:     /**
333:      * See what state to move to while within non-quoted header values
334:      */
335:     protected function value()
336:     {
337:         if ($this->is_linear_whitespace())
338:         {
339:             $this->linear_whitespace();
340:         }
341:         else
342:         {
343:             switch ($this->data[$this->position])
344:             {
345:                 case '"':
346:                     // Workaround for ETags: we have to include the quotes as
347:                     // part of the tag.
348:                     if (strtolower($this->name) === 'etag')
349:                     {
350:                         $this->value .= '"';
351:                         $this->position++;
352:                         $this->state = 'value_char';
353:                         break;
354:                     }
355:                     $this->position++;
356:                     $this->state = 'quote';
357:                     break;
358: 
359:                 case "\x0A":
360:                     $this->position++;
361:                     $this->state = 'new_line';
362:                     break;
363: 
364:                 default:
365:                     $this->state = 'value_char';
366:                     break;
367:             }
368:         }
369:     }
370: 
371:     /**
372:      * Parse a header value while outside quotes
373:      */
374:     protected function value_char()
375:     {
376:         $len = strcspn($this->data, "\x09\x20\x0A\"", $this->position);
377:         $this->value .= substr($this->data, $this->position, $len);
378:         $this->position += $len;
379:         $this->state = 'value';
380:     }
381: 
382:     /**
383:      * See what state to move to while within quoted header values
384:      */
385:     protected function quote()
386:     {
387:         if ($this->is_linear_whitespace())
388:         {
389:             $this->linear_whitespace();
390:         }
391:         else
392:         {
393:             switch ($this->data[$this->position])
394:             {
395:                 case '"':
396:                     $this->position++;
397:                     $this->state = 'value';
398:                     break;
399: 
400:                 case "\x0A":
401:                     $this->position++;
402:                     $this->state = 'new_line';
403:                     break;
404: 
405:                 case '\\':
406:                     $this->position++;
407:                     $this->state = 'quote_escaped';
408:                     break;
409: 
410:                 default:
411:                     $this->state = 'quote_char';
412:                     break;
413:             }
414:         }
415:     }
416: 
417:     /**
418:      * Parse a header value while within quotes
419:      */
420:     protected function quote_char()
421:     {
422:         $len = strcspn($this->data, "\x09\x20\x0A\"\\", $this->position);
423:         $this->value .= substr($this->data, $this->position, $len);
424:         $this->position += $len;
425:         $this->state = 'value';
426:     }
427: 
428:     /**
429:      * Parse an escaped character within quotes
430:      */
431:     protected function quote_escaped()
432:     {
433:         $this->value .= $this->data[$this->position];
434:         $this->position++;
435:         $this->state = 'quote';
436:     }
437: 
438:     /**
439:      * Parse the body
440:      */
441:     protected function body()
442:     {
443:         $this->body = substr($this->data, $this->position);
444:         if (!empty($this->headers['transfer-encoding']))
445:         {
446:             unset($this->headers['transfer-encoding']);
447:             $this->state = 'chunked';
448:         }
449:         else
450:         {
451:             $this->state = 'emit';
452:         }
453:     }
454: 
455:     /**
456:      * Parsed a "Transfer-Encoding: chunked" body
457:      */
458:     protected function chunked()
459:     {
460:         if (!preg_match('/^([0-9a-f]+)[^\r\n]*\r\n/i', trim($this->body)))
461:         {
462:             $this->state = 'emit';
463:             return;
464:         }
465: 
466:         $decoded = '';
467:         $encoded = $this->body;
468: 
469:         while (true)
470:         {
471:             $is_chunked = (bool) preg_match( '/^([0-9a-f]+)[^\r\n]*\r\n/i', $encoded, $matches );
472:             if (!$is_chunked)
473:             {
474:                 // Looks like it's not chunked after all
475:                 $this->state = 'emit';
476:                 return;
477:             }
478: 
479:             $length = hexdec(trim($matches[1]));
480:             if ($length === 0)
481:             {
482:                 // Ignore trailer headers
483:                 $this->state = 'emit';
484:                 $this->body = $decoded;
485:                 return;
486:             }
487: 
488:             $chunk_length = strlen($matches[0]);
489:             $decoded .= $part = substr($encoded, $chunk_length, $length);
490:             $encoded = substr($encoded, $chunk_length + $length + 2);
491: 
492:             if (trim($encoded) === '0' || empty($encoded))
493:             {
494:                 $this->state = 'emit';
495:                 $this->body = $decoded;
496:                 return;
497:             }
498:         }
499:     }
500: }
501: 

Show some love! Wishlists for Geoffrey, Ryan P., and Ryan M.

SimplePie is © 2004–2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue and contributors. Licensed under the BSD License. Hosted thanks to Matt Mullenweg, API documentation generated by ApiGen 2.6.1. Variation on the Feed Icon by Wolfgang Bartelme.