SimplePie: PHP-based RSS and Atom feed handling
 
  • Overview
  • Demo
  • Blog
  • Download
  • Documentation
  • API Docs
  • Support
  • Issue Tracker
  • FAQ
  • Overview
  • Package
  • Class
  • Tree
  • Deprecated

Packages

  • SimplePie
    • API
    • Caching
    • HTTP
    • Parsing

Classes

  • SimplePie_Content_Type_Sniffer
  • SimplePie_File
  • SimplePie_gzdecode
  • SimplePie_HTTP_Parser
  • SimplePie_IRI
  • SimplePie_Net_IPv6
   1: <?php
   2: /**
   3:  * SimplePie
   4:  *
   5:  * A PHP-Based RSS and Atom Feed Framework.
   6:  * Takes the hard work out of managing a complete RSS/Atom solution.
   7:  *
   8:  * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
   9:  * All rights reserved.
  10:  *
  11:  * Redistribution and use in source and binary forms, with or without modification, are
  12:  * permitted provided that the following conditions are met:
  13:  *
  14:  *  * Redistributions of source code must retain the above copyright notice, this list of
  15:  *    conditions and the following disclaimer.
  16:  *
  17:  *  * Redistributions in binary form must reproduce the above copyright notice, this list
  18:  *    of conditions and the following disclaimer in the documentation and/or other materials
  19:  *    provided with the distribution.
  20:  *
  21:  *  * Neither the name of the SimplePie Team nor the names of its contributors may be used
  22:  *    to endorse or promote products derived from this software without specific prior
  23:  *    written permission.
  24:  *
  25:  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
  26:  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
  27:  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
  28:  * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  29:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  30:  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  31:  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  32:  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  33:  * POSSIBILITY OF SUCH DAMAGE.
  34:  *
  35:  * @package SimplePie
  36:  * @version 1.3
  37:  * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
  38:  * @author Ryan Parman
  39:  * @author Geoffrey Sneddon
  40:  * @author Ryan McCue
  41:  * @link http://simplepie.org/ SimplePie
  42:  * @license http://www.opensource.org/licenses/bsd-license.php BSD License
  43:  */
  44: 
  45: /**
  46:  * IRI parser/serialiser/normaliser
  47:  *
  48:  * @package SimplePie
  49:  * @subpackage HTTP
  50:  * @author Geoffrey Sneddon
  51:  * @author Steve Minutillo
  52:  * @author Ryan McCue
  53:  * @copyright 2007-2012 Geoffrey Sneddon, Steve Minutillo, Ryan McCue
  54:  * @license http://www.opensource.org/licenses/bsd-license.php
  55:  */
  56: class SimplePie_IRI
  57: {
  58:     /**
  59:      * Scheme
  60:      *
  61:      * @var string
  62:      */
  63:     protected $scheme = null;
  64: 
  65:     /**
  66:      * User Information
  67:      *
  68:      * @var string
  69:      */
  70:     protected $iuserinfo = null;
  71: 
  72:     /**
  73:      * ihost
  74:      *
  75:      * @var string
  76:      */
  77:     protected $ihost = null;
  78: 
  79:     /**
  80:      * Port
  81:      *
  82:      * @var string
  83:      */
  84:     protected $port = null;
  85: 
  86:     /**
  87:      * ipath
  88:      *
  89:      * @var string
  90:      */
  91:     protected $ipath = '';
  92: 
  93:     /**
  94:      * iquery
  95:      *
  96:      * @var string
  97:      */
  98:     protected $iquery = null;
  99: 
 100:     /**
 101:      * ifragment
 102:      *
 103:      * @var string
 104:      */
 105:     protected $ifragment = null;
 106: 
 107:     /**
 108:      * Normalization database
 109:      *
 110:      * Each key is the scheme, each value is an array with each key as the IRI
 111:      * part and value as the default value for that part.
 112:      */
 113:     protected $normalization = array(
 114:         'acap' => array(
 115:             'port' => 674
 116:         ),
 117:         'dict' => array(
 118:             'port' => 2628
 119:         ),
 120:         'file' => array(
 121:             'ihost' => 'localhost'
 122:         ),
 123:         'http' => array(
 124:             'port' => 80,
 125:             'ipath' => '/'
 126:         ),
 127:         'https' => array(
 128:             'port' => 443,
 129:             'ipath' => '/'
 130:         ),
 131:     );
 132: 
 133:     /**
 134:      * Return the entire IRI when you try and read the object as a string
 135:      *
 136:      * @return string
 137:      */
 138:     public function __toString()
 139:     {
 140:         return $this->get_iri();
 141:     }
 142: 
 143:     /**
 144:      * Overload __set() to provide access via properties
 145:      *
 146:      * @param string $name Property name
 147:      * @param mixed $value Property value
 148:      */
 149:     public function __set($name, $value)
 150:     {
 151:         if (method_exists($this, 'set_' . $name))
 152:         {
 153:             call_user_func(array($this, 'set_' . $name), $value);
 154:         }
 155:         elseif (
 156:                $name === 'iauthority'
 157:             || $name === 'iuserinfo'
 158:             || $name === 'ihost'
 159:             || $name === 'ipath'
 160:             || $name === 'iquery'
 161:             || $name === 'ifragment'
 162:         )
 163:         {
 164:             call_user_func(array($this, 'set_' . substr($name, 1)), $value);
 165:         }
 166:     }
 167: 
 168:     /**
 169:      * Overload __get() to provide access via properties
 170:      *
 171:      * @param string $name Property name
 172:      * @return mixed
 173:      */
 174:     public function __get($name)
 175:     {
 176:         // isset() returns false for null, we don't want to do that
 177:         // Also why we use array_key_exists below instead of isset()
 178:         $props = get_object_vars($this);
 179: 
 180:         if (
 181:             $name === 'iri' ||
 182:             $name === 'uri' ||
 183:             $name === 'iauthority' ||
 184:             $name === 'authority'
 185:         )
 186:         {
 187:             $return = $this->{"get_$name"}();
 188:         }
 189:         elseif (array_key_exists($name, $props))
 190:         {
 191:             $return = $this->$name;
 192:         }
 193:         // host -> ihost
 194:         elseif (($prop = 'i' . $name) && array_key_exists($prop, $props))
 195:         {
 196:             $name = $prop;
 197:             $return = $this->$prop;
 198:         }
 199:         // ischeme -> scheme
 200:         elseif (($prop = substr($name, 1)) && array_key_exists($prop, $props))
 201:         {
 202:             $name = $prop;
 203:             $return = $this->$prop;
 204:         }
 205:         else
 206:         {
 207:             trigger_error('Undefined property: ' . get_class($this) . '::' . $name, E_USER_NOTICE);
 208:             $return = null;
 209:         }
 210: 
 211:         if ($return === null && isset($this->normalization[$this->scheme][$name]))
 212:         {
 213:             return $this->normalization[$this->scheme][$name];
 214:         }
 215:         else
 216:         {
 217:             return $return;
 218:         }
 219:     }
 220: 
 221:     /**
 222:      * Overload __isset() to provide access via properties
 223:      *
 224:      * @param string $name Property name
 225:      * @return bool
 226:      */
 227:     public function __isset($name)
 228:     {
 229:         if (method_exists($this, 'get_' . $name) || isset($this->$name))
 230:         {
 231:             return true;
 232:         }
 233:         else
 234:         {
 235:             return false;
 236:         }
 237:     }
 238: 
 239:     /**
 240:      * Overload __unset() to provide access via properties
 241:      *
 242:      * @param string $name Property name
 243:      */
 244:     public function __unset($name)
 245:     {
 246:         if (method_exists($this, 'set_' . $name))
 247:         {
 248:             call_user_func(array($this, 'set_' . $name), '');
 249:         }
 250:     }
 251: 
 252:     /**
 253:      * Create a new IRI object, from a specified string
 254:      *
 255:      * @param string $iri
 256:      */
 257:     public function __construct($iri = null)
 258:     {
 259:         $this->set_iri($iri);
 260:     }
 261: 
 262:     /**
 263:      * Create a new IRI object by resolving a relative IRI
 264:      *
 265:      * Returns false if $base is not absolute, otherwise an IRI.
 266:      *
 267:      * @param IRI|string $base (Absolute) Base IRI
 268:      * @param IRI|string $relative Relative IRI
 269:      * @return IRI|false
 270:      */
 271:     public static function absolutize($base, $relative)
 272:     {
 273:         if (!($relative instanceof SimplePie_IRI))
 274:         {
 275:             $relative = new SimplePie_IRI($relative);
 276:         }
 277:         if (!$relative->is_valid())
 278:         {
 279:             return false;
 280:         }
 281:         elseif ($relative->scheme !== null)
 282:         {
 283:             return clone $relative;
 284:         }
 285:         else
 286:         {
 287:             if (!($base instanceof SimplePie_IRI))
 288:             {
 289:                 $base = new SimplePie_IRI($base);
 290:             }
 291:             if ($base->scheme !== null && $base->is_valid())
 292:             {
 293:                 if ($relative->get_iri() !== '')
 294:                 {
 295:                     if ($relative->iuserinfo !== null || $relative->ihost !== null || $relative->port !== null)
 296:                     {
 297:                         $target = clone $relative;
 298:                         $target->scheme = $base->scheme;
 299:                     }
 300:                     else
 301:                     {
 302:                         $target = new SimplePie_IRI;
 303:                         $target->scheme = $base->scheme;
 304:                         $target->iuserinfo = $base->iuserinfo;
 305:                         $target->ihost = $base->ihost;
 306:                         $target->port = $base->port;
 307:                         if ($relative->ipath !== '')
 308:                         {
 309:                             if ($relative->ipath[0] === '/')
 310:                             {
 311:                                 $target->ipath = $relative->ipath;
 312:                             }
 313:                             elseif (($base->iuserinfo !== null || $base->ihost !== null || $base->port !== null) && $base->ipath === '')
 314:                             {
 315:                                 $target->ipath = '/' . $relative->ipath;
 316:                             }
 317:                             elseif (($last_segment = strrpos($base->ipath, '/')) !== false)
 318:                             {
 319:                                 $target->ipath = substr($base->ipath, 0, $last_segment + 1) . $relative->ipath;
 320:                             }
 321:                             else
 322:                             {
 323:                                 $target->ipath = $relative->ipath;
 324:                             }
 325:                             $target->ipath = $target->remove_dot_segments($target->ipath);
 326:                             $target->iquery = $relative->iquery;
 327:                         }
 328:                         else
 329:                         {
 330:                             $target->ipath = $base->ipath;
 331:                             if ($relative->iquery !== null)
 332:                             {
 333:                                 $target->iquery = $relative->iquery;
 334:                             }
 335:                             elseif ($base->iquery !== null)
 336:                             {
 337:                                 $target->iquery = $base->iquery;
 338:                             }
 339:                         }
 340:                         $target->ifragment = $relative->ifragment;
 341:                     }
 342:                 }
 343:                 else
 344:                 {
 345:                     $target = clone $base;
 346:                     $target->ifragment = null;
 347:                 }
 348:                 $target->scheme_normalization();
 349:                 return $target;
 350:             }
 351:             else
 352:             {
 353:                 return false;
 354:             }
 355:         }
 356:     }
 357: 
 358:     /**
 359:      * Parse an IRI into scheme/authority/path/query/fragment segments
 360:      *
 361:      * @param string $iri
 362:      * @return array
 363:      */
 364:     protected function parse_iri($iri)
 365:     {
 366:         $iri = trim($iri, "\x20\x09\x0A\x0C\x0D");
 367:         if (preg_match('/^((?P<scheme>[^:\/?#]+):)?(\/\/(?P<authority>[^\/?#]*))?(?P<path>[^?#]*)(\?(?P<query>[^#]*))?(#(?P<fragment>.*))?$/', $iri, $match))
 368:         {
 369:             if ($match[1] === '')
 370:             {
 371:                 $match['scheme'] = null;
 372:             }
 373:             if (!isset($match[3]) || $match[3] === '')
 374:             {
 375:                 $match['authority'] = null;
 376:             }
 377:             if (!isset($match[5]))
 378:             {
 379:                 $match['path'] = '';
 380:             }
 381:             if (!isset($match[6]) || $match[6] === '')
 382:             {
 383:                 $match['query'] = null;
 384:             }
 385:             if (!isset($match[8]) || $match[8] === '')
 386:             {
 387:                 $match['fragment'] = null;
 388:             }
 389:             return $match;
 390:         }
 391:         else
 392:         {
 393:             trigger_error('This should never happen', E_USER_ERROR);
 394:             die;
 395:         }
 396:     }
 397: 
 398:     /**
 399:      * Remove dot segments from a path
 400:      *
 401:      * @param string $input
 402:      * @return string
 403:      */
 404:     protected function remove_dot_segments($input)
 405:     {
 406:         $output = '';
 407:         while (strpos($input, './') !== false || strpos($input, '/.') !== false || $input === '.' || $input === '..')
 408:         {
 409:             // A: If the input buffer begins with a prefix of "../" or "./", then remove that prefix from the input buffer; otherwise,
 410:             if (strpos($input, '../') === 0)
 411:             {
 412:                 $input = substr($input, 3);
 413:             }
 414:             elseif (strpos($input, './') === 0)
 415:             {
 416:                 $input = substr($input, 2);
 417:             }
 418:             // B: if the input buffer begins with a prefix of "/./" or "/.", where "." is a complete path segment, then replace that prefix with "/" in the input buffer; otherwise,
 419:             elseif (strpos($input, '/./') === 0)
 420:             {
 421:                 $input = substr($input, 2);
 422:             }
 423:             elseif ($input === '/.')
 424:             {
 425:                 $input = '/';
 426:             }
 427:             // C: if the input buffer begins with a prefix of "/../" or "/..", where ".." is a complete path segment, then replace that prefix with "/" in the input buffer and remove the last segment and its preceding "/" (if any) from the output buffer; otherwise,
 428:             elseif (strpos($input, '/../') === 0)
 429:             {
 430:                 $input = substr($input, 3);
 431:                 $output = substr_replace($output, '', strrpos($output, '/'));
 432:             }
 433:             elseif ($input === '/..')
 434:             {
 435:                 $input = '/';
 436:                 $output = substr_replace($output, '', strrpos($output, '/'));
 437:             }
 438:             // D: if the input buffer consists only of "." or "..", then remove that from the input buffer; otherwise,
 439:             elseif ($input === '.' || $input === '..')
 440:             {
 441:                 $input = '';
 442:             }
 443:             // E: move the first path segment in the input buffer to the end of the output buffer, including the initial "/" character (if any) and any subsequent characters up to, but not including, the next "/" character or the end of the input buffer
 444:             elseif (($pos = strpos($input, '/', 1)) !== false)
 445:             {
 446:                 $output .= substr($input, 0, $pos);
 447:                 $input = substr_replace($input, '', 0, $pos);
 448:             }
 449:             else
 450:             {
 451:                 $output .= $input;
 452:                 $input = '';
 453:             }
 454:         }
 455:         return $output . $input;
 456:     }
 457: 
 458:     /**
 459:      * Replace invalid character with percent encoding
 460:      *
 461:      * @param string $string Input string
 462:      * @param string $extra_chars Valid characters not in iunreserved or
 463:      *                            iprivate (this is ASCII-only)
 464:      * @param bool $iprivate Allow iprivate
 465:      * @return string
 466:      */
 467:     protected function replace_invalid_with_pct_encoding($string, $extra_chars, $iprivate = false)
 468:     {
 469:         // Normalize as many pct-encoded sections as possible
 470:         $string = preg_replace_callback('/(?:%[A-Fa-f0-9]{2})+/', array($this, 'remove_iunreserved_percent_encoded'), $string);
 471: 
 472:         // Replace invalid percent characters
 473:         $string = preg_replace('/%(?![A-Fa-f0-9]{2})/', '%25', $string);
 474: 
 475:         // Add unreserved and % to $extra_chars (the latter is safe because all
 476:         // pct-encoded sections are now valid).
 477:         $extra_chars .= 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~%';
 478: 
 479:         // Now replace any bytes that aren't allowed with their pct-encoded versions
 480:         $position = 0;
 481:         $strlen = strlen($string);
 482:         while (($position += strspn($string, $extra_chars, $position)) < $strlen)
 483:         {
 484:             $value = ord($string[$position]);
 485: 
 486:             // Start position
 487:             $start = $position;
 488: 
 489:             // By default we are valid
 490:             $valid = true;
 491: 
 492:             // No one byte sequences are valid due to the while.
 493:             // Two byte sequence:
 494:             if (($value & 0xE0) === 0xC0)
 495:             {
 496:                 $character = ($value & 0x1F) << 6;
 497:                 $length = 2;
 498:                 $remaining = 1;
 499:             }
 500:             // Three byte sequence:
 501:             elseif (($value & 0xF0) === 0xE0)
 502:             {
 503:                 $character = ($value & 0x0F) << 12;
 504:                 $length = 3;
 505:                 $remaining = 2;
 506:             }
 507:             // Four byte sequence:
 508:             elseif (($value & 0xF8) === 0xF0)
 509:             {
 510:                 $character = ($value & 0x07) << 18;
 511:                 $length = 4;
 512:                 $remaining = 3;
 513:             }
 514:             // Invalid byte:
 515:             else
 516:             {
 517:                 $valid = false;
 518:                 $length = 1;
 519:                 $remaining = 0;
 520:             }
 521: 
 522:             if ($remaining)
 523:             {
 524:                 if ($position + $length <= $strlen)
 525:                 {
 526:                     for ($position++; $remaining; $position++)
 527:                     {
 528:                         $value = ord($string[$position]);
 529: 
 530:                         // Check that the byte is valid, then add it to the character:
 531:                         if (($value & 0xC0) === 0x80)
 532:                         {
 533:                             $character |= ($value & 0x3F) << (--$remaining * 6);
 534:                         }
 535:                         // If it is invalid, count the sequence as invalid and reprocess the current byte:
 536:                         else
 537:                         {
 538:                             $valid = false;
 539:                             $position--;
 540:                             break;
 541:                         }
 542:                     }
 543:                 }
 544:                 else
 545:                 {
 546:                     $position = $strlen - 1;
 547:                     $valid = false;
 548:                 }
 549:             }
 550: 
 551:             // Percent encode anything invalid or not in ucschar
 552:             if (
 553:                 // Invalid sequences
 554:                 !$valid
 555:                 // Non-shortest form sequences are invalid
 556:                 || $length > 1 && $character <= 0x7F
 557:                 || $length > 2 && $character <= 0x7FF
 558:                 || $length > 3 && $character <= 0xFFFF
 559:                 // Outside of range of ucschar codepoints
 560:                 // Noncharacters
 561:                 || ($character & 0xFFFE) === 0xFFFE
 562:                 || $character >= 0xFDD0 && $character <= 0xFDEF
 563:                 || (
 564:                     // Everything else not in ucschar
 565:                        $character > 0xD7FF && $character < 0xF900
 566:                     || $character < 0xA0
 567:                     || $character > 0xEFFFD
 568:                 )
 569:                 && (
 570:                     // Everything not in iprivate, if it applies
 571:                        !$iprivate
 572:                     || $character < 0xE000
 573:                     || $character > 0x10FFFD
 574:                 )
 575:             )
 576:             {
 577:                 // If we were a character, pretend we weren't, but rather an error.
 578:                 if ($valid)
 579:                     $position--;
 580: 
 581:                 for ($j = $start; $j <= $position; $j++)
 582:                 {
 583:                     $string = substr_replace($string, sprintf('%%%02X', ord($string[$j])), $j, 1);
 584:                     $j += 2;
 585:                     $position += 2;
 586:                     $strlen += 2;
 587:                 }
 588:             }
 589:         }
 590: 
 591:         return $string;
 592:     }
 593: 
 594:     /**
 595:      * Callback function for preg_replace_callback.
 596:      *
 597:      * Removes sequences of percent encoded bytes that represent UTF-8
 598:      * encoded characters in iunreserved
 599:      *
 600:      * @param array $match PCRE match
 601:      * @return string Replacement
 602:      */
 603:     protected function remove_iunreserved_percent_encoded($match)
 604:     {
 605:         // As we just have valid percent encoded sequences we can just explode
 606:         // and ignore the first member of the returned array (an empty string).
 607:         $bytes = explode('%', $match[0]);
 608: 
 609:         // Initialize the new string (this is what will be returned) and that
 610:         // there are no bytes remaining in the current sequence (unsurprising
 611:         // at the first byte!).
 612:         $string = '';
 613:         $remaining = 0;
 614: 
 615:         // Loop over each and every byte, and set $value to its value
 616:         for ($i = 1, $len = count($bytes); $i < $len; $i++)
 617:         {
 618:             $value = hexdec($bytes[$i]);
 619: 
 620:             // If we're the first byte of sequence:
 621:             if (!$remaining)
 622:             {
 623:                 // Start position
 624:                 $start = $i;
 625: 
 626:                 // By default we are valid
 627:                 $valid = true;
 628: 
 629:                 // One byte sequence:
 630:                 if ($value <= 0x7F)
 631:                 {
 632:                     $character = $value;
 633:                     $length = 1;
 634:                 }
 635:                 // Two byte sequence:
 636:                 elseif (($value & 0xE0) === 0xC0)
 637:                 {
 638:                     $character = ($value & 0x1F) << 6;
 639:                     $length = 2;
 640:                     $remaining = 1;
 641:                 }
 642:                 // Three byte sequence:
 643:                 elseif (($value & 0xF0) === 0xE0)
 644:                 {
 645:                     $character = ($value & 0x0F) << 12;
 646:                     $length = 3;
 647:                     $remaining = 2;
 648:                 }
 649:                 // Four byte sequence:
 650:                 elseif (($value & 0xF8) === 0xF0)
 651:                 {
 652:                     $character = ($value & 0x07) << 18;
 653:                     $length = 4;
 654:                     $remaining = 3;
 655:                 }
 656:                 // Invalid byte:
 657:                 else
 658:                 {
 659:                     $valid = false;
 660:                     $remaining = 0;
 661:                 }
 662:             }
 663:             // Continuation byte:
 664:             else
 665:             {
 666:                 // Check that the byte is valid, then add it to the character:
 667:                 if (($value & 0xC0) === 0x80)
 668:                 {
 669:                     $remaining--;
 670:                     $character |= ($value & 0x3F) << ($remaining * 6);
 671:                 }
 672:                 // If it is invalid, count the sequence as invalid and reprocess the current byte as the start of a sequence:
 673:                 else
 674:                 {
 675:                     $valid = false;
 676:                     $remaining = 0;
 677:                     $i--;
 678:                 }
 679:             }
 680: 
 681:             // If we've reached the end of the current byte sequence, append it to Unicode::$data
 682:             if (!$remaining)
 683:             {
 684:                 // Percent encode anything invalid or not in iunreserved
 685:                 if (
 686:                     // Invalid sequences
 687:                     !$valid
 688:                     // Non-shortest form sequences are invalid
 689:                     || $length > 1 && $character <= 0x7F
 690:                     || $length > 2 && $character <= 0x7FF
 691:                     || $length > 3 && $character <= 0xFFFF
 692:                     // Outside of range of iunreserved codepoints
 693:                     || $character < 0x2D
 694:                     || $character > 0xEFFFD
 695:                     // Noncharacters
 696:                     || ($character & 0xFFFE) === 0xFFFE
 697:                     || $character >= 0xFDD0 && $character <= 0xFDEF
 698:                     // Everything else not in iunreserved (this is all BMP)
 699:                     || $character === 0x2F
 700:                     || $character > 0x39 && $character < 0x41
 701:                     || $character > 0x5A && $character < 0x61
 702:                     || $character > 0x7A && $character < 0x7E
 703:                     || $character > 0x7E && $character < 0xA0
 704:                     || $character > 0xD7FF && $character < 0xF900
 705:                 )
 706:                 {
 707:                     for ($j = $start; $j <= $i; $j++)
 708:                     {
 709:                         $string .= '%' . strtoupper($bytes[$j]);
 710:                     }
 711:                 }
 712:                 else
 713:                 {
 714:                     for ($j = $start; $j <= $i; $j++)
 715:                     {
 716:                         $string .= chr(hexdec($bytes[$j]));
 717:                     }
 718:                 }
 719:             }
 720:         }
 721: 
 722:         // If we have any bytes left over they are invalid (i.e., we are
 723:         // mid-way through a multi-byte sequence)
 724:         if ($remaining)
 725:         {
 726:             for ($j = $start; $j < $len; $j++)
 727:             {
 728:                 $string .= '%' . strtoupper($bytes[$j]);
 729:             }
 730:         }
 731: 
 732:         return $string;
 733:     }
 734: 
 735:     protected function scheme_normalization()
 736:     {
 737:         if (isset($this->normalization[$this->scheme]['iuserinfo']) && $this->iuserinfo === $this->normalization[$this->scheme]['iuserinfo'])
 738:         {
 739:             $this->iuserinfo = null;
 740:         }
 741:         if (isset($this->normalization[$this->scheme]['ihost']) && $this->ihost === $this->normalization[$this->scheme]['ihost'])
 742:         {
 743:             $this->ihost = null;
 744:         }
 745:         if (isset($this->normalization[$this->scheme]['port']) && $this->port === $this->normalization[$this->scheme]['port'])
 746:         {
 747:             $this->port = null;
 748:         }
 749:         if (isset($this->normalization[$this->scheme]['ipath']) && $this->ipath === $this->normalization[$this->scheme]['ipath'])
 750:         {
 751:             $this->ipath = '';
 752:         }
 753:         if (isset($this->normalization[$this->scheme]['iquery']) && $this->iquery === $this->normalization[$this->scheme]['iquery'])
 754:         {
 755:             $this->iquery = null;
 756:         }
 757:         if (isset($this->normalization[$this->scheme]['ifragment']) && $this->ifragment === $this->normalization[$this->scheme]['ifragment'])
 758:         {
 759:             $this->ifragment = null;
 760:         }
 761:     }
 762: 
 763:     /**
 764:      * Check if the object represents a valid IRI. This needs to be done on each
 765:      * call as some things change depending on another part of the IRI.
 766:      *
 767:      * @return bool
 768:      */
 769:     public function is_valid()
 770:     {
 771:         $isauthority = $this->iuserinfo !== null || $this->ihost !== null || $this->port !== null;
 772:         if ($this->ipath !== '' &&
 773:             (
 774:                 $isauthority && (
 775:                     $this->ipath[0] !== '/' ||
 776:                     substr($this->ipath, 0, 2) === '//'
 777:                 ) ||
 778:                 (
 779:                     $this->scheme === null &&
 780:                     !$isauthority &&
 781:                     strpos($this->ipath, ':') !== false &&
 782:                     (strpos($this->ipath, '/') === false ? true : strpos($this->ipath, ':') < strpos($this->ipath, '/'))
 783:                 )
 784:             )
 785:         )
 786:         {
 787:             return false;
 788:         }
 789: 
 790:         return true;
 791:     }
 792: 
 793:     /**
 794:      * Set the entire IRI. Returns true on success, false on failure (if there
 795:      * are any invalid characters).
 796:      *
 797:      * @param string $iri
 798:      * @return bool
 799:      */
 800:     public function set_iri($iri)
 801:     {
 802:         static $cache;
 803:         if (!$cache)
 804:         {
 805:             $cache = array();
 806:         }
 807: 
 808:         if ($iri === null)
 809:         {
 810:             return true;
 811:         }
 812:         elseif (isset($cache[$iri]))
 813:         {
 814:             list($this->scheme,
 815:                  $this->iuserinfo,
 816:                  $this->ihost,
 817:                  $this->port,
 818:                  $this->ipath,
 819:                  $this->iquery,
 820:                  $this->ifragment,
 821:                  $return) = $cache[$iri];
 822:             return $return;
 823:         }
 824:         else
 825:         {
 826:             $parsed = $this->parse_iri((string) $iri);
 827: 
 828:             $return = $this->set_scheme($parsed['scheme'])
 829:                 && $this->set_authority($parsed['authority'])
 830:                 && $this->set_path($parsed['path'])
 831:                 && $this->set_query($parsed['query'])
 832:                 && $this->set_fragment($parsed['fragment']);
 833: 
 834:             $cache[$iri] = array($this->scheme,
 835:                                  $this->iuserinfo,
 836:                                  $this->ihost,
 837:                                  $this->port,
 838:                                  $this->ipath,
 839:                                  $this->iquery,
 840:                                  $this->ifragment,
 841:                                  $return);
 842:             return $return;
 843:         }
 844:     }
 845: 
 846:     /**
 847:      * Set the scheme. Returns true on success, false on failure (if there are
 848:      * any invalid characters).
 849:      *
 850:      * @param string $scheme
 851:      * @return bool
 852:      */
 853:     public function set_scheme($scheme)
 854:     {
 855:         if ($scheme === null)
 856:         {
 857:             $this->scheme = null;
 858:         }
 859:         elseif (!preg_match('/^[A-Za-z][0-9A-Za-z+\-.]*$/', $scheme))
 860:         {
 861:             $this->scheme = null;
 862:             return false;
 863:         }
 864:         else
 865:         {
 866:             $this->scheme = strtolower($scheme);
 867:         }
 868:         return true;
 869:     }
 870: 
 871:     /**
 872:      * Set the authority. Returns true on success, false on failure (if there are
 873:      * any invalid characters).
 874:      *
 875:      * @param string $authority
 876:      * @return bool
 877:      */
 878:     public function set_authority($authority)
 879:     {
 880:         static $cache;
 881:         if (!$cache)
 882:             $cache = array();
 883: 
 884:         if ($authority === null)
 885:         {
 886:             $this->iuserinfo = null;
 887:             $this->ihost = null;
 888:             $this->port = null;
 889:             return true;
 890:         }
 891:         elseif (isset($cache[$authority]))
 892:         {
 893:             list($this->iuserinfo,
 894:                  $this->ihost,
 895:                  $this->port,
 896:                  $return) = $cache[$authority];
 897: 
 898:             return $return;
 899:         }
 900:         else
 901:         {
 902:             $remaining = $authority;
 903:             if (($iuserinfo_end = strrpos($remaining, '@')) !== false)
 904:             {
 905:                 $iuserinfo = substr($remaining, 0, $iuserinfo_end);
 906:                 $remaining = substr($remaining, $iuserinfo_end + 1);
 907:             }
 908:             else
 909:             {
 910:                 $iuserinfo = null;
 911:             }
 912:             if (($port_start = strpos($remaining, ':', strpos($remaining, ']'))) !== false)
 913:             {
 914:                 if (($port = substr($remaining, $port_start + 1)) === false)
 915:                 {
 916:                     $port = null;
 917:                 }
 918:                 $remaining = substr($remaining, 0, $port_start);
 919:             }
 920:             else
 921:             {
 922:                 $port = null;
 923:             }
 924: 
 925:             $return = $this->set_userinfo($iuserinfo) &&
 926:                       $this->set_host($remaining) &&
 927:                       $this->set_port($port);
 928: 
 929:             $cache[$authority] = array($this->iuserinfo,
 930:                                        $this->ihost,
 931:                                        $this->port,
 932:                                        $return);
 933: 
 934:             return $return;
 935:         }
 936:     }
 937: 
 938:     /**
 939:      * Set the iuserinfo.
 940:      *
 941:      * @param string $iuserinfo
 942:      * @return bool
 943:      */
 944:     public function set_userinfo($iuserinfo)
 945:     {
 946:         if ($iuserinfo === null)
 947:         {
 948:             $this->iuserinfo = null;
 949:         }
 950:         else
 951:         {
 952:             $this->iuserinfo = $this->replace_invalid_with_pct_encoding($iuserinfo, '!$&\'()*+,;=:');
 953:             $this->scheme_normalization();
 954:         }
 955: 
 956:         return true;
 957:     }
 958: 
 959:     /**
 960:      * Set the ihost. Returns true on success, false on failure (if there are
 961:      * any invalid characters).
 962:      *
 963:      * @param string $ihost
 964:      * @return bool
 965:      */
 966:     public function set_host($ihost)
 967:     {
 968:         if ($ihost === null)
 969:         {
 970:             $this->ihost = null;
 971:             return true;
 972:         }
 973:         elseif (substr($ihost, 0, 1) === '[' && substr($ihost, -1) === ']')
 974:         {
 975:             if (SimplePie_Net_IPv6::check_ipv6(substr($ihost, 1, -1)))
 976:             {
 977:                 $this->ihost = '[' . SimplePie_Net_IPv6::compress(substr($ihost, 1, -1)) . ']';
 978:             }
 979:             else
 980:             {
 981:                 $this->ihost = null;
 982:                 return false;
 983:             }
 984:         }
 985:         else
 986:         {
 987:             $ihost = $this->replace_invalid_with_pct_encoding($ihost, '!$&\'()*+,;=');
 988: 
 989:             // Lowercase, but ignore pct-encoded sections (as they should
 990:             // remain uppercase). This must be done after the previous step
 991:             // as that can add unescaped characters.
 992:             $position = 0;
 993:             $strlen = strlen($ihost);
 994:             while (($position += strcspn($ihost, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ%', $position)) < $strlen)
 995:             {
 996:                 if ($ihost[$position] === '%')
 997:                 {
 998:                     $position += 3;
 999:                 }
1000:                 else
1001:                 {
1002:                     $ihost[$position] = strtolower($ihost[$position]);
1003:                     $position++;
1004:                 }
1005:             }
1006: 
1007:             $this->ihost = $ihost;
1008:         }
1009: 
1010:         $this->scheme_normalization();
1011: 
1012:         return true;
1013:     }
1014: 
1015:     /**
1016:      * Set the port. Returns true on success, false on failure (if there are
1017:      * any invalid characters).
1018:      *
1019:      * @param string $port
1020:      * @return bool
1021:      */
1022:     public function set_port($port)
1023:     {
1024:         if ($port === null)
1025:         {
1026:             $this->port = null;
1027:             return true;
1028:         }
1029:         elseif (strspn($port, '0123456789') === strlen($port))
1030:         {
1031:             $this->port = (int) $port;
1032:             $this->scheme_normalization();
1033:             return true;
1034:         }
1035:         else
1036:         {
1037:             $this->port = null;
1038:             return false;
1039:         }
1040:     }
1041: 
1042:     /**
1043:      * Set the ipath.
1044:      *
1045:      * @param string $ipath
1046:      * @return bool
1047:      */
1048:     public function set_path($ipath)
1049:     {
1050:         static $cache;
1051:         if (!$cache)
1052:         {
1053:             $cache = array();
1054:         }
1055: 
1056:         $ipath = (string) $ipath;
1057: 
1058:         if (isset($cache[$ipath]))
1059:         {
1060:             $this->ipath = $cache[$ipath][(int) ($this->scheme !== null)];
1061:         }
1062:         else
1063:         {
1064:             $valid = $this->replace_invalid_with_pct_encoding($ipath, '!$&\'()*+,;=@:/');
1065:             $removed = $this->remove_dot_segments($valid);
1066: 
1067:             $cache[$ipath] = array($valid, $removed);
1068:             $this->ipath =  ($this->scheme !== null) ? $removed : $valid;
1069:         }
1070: 
1071:         $this->scheme_normalization();
1072:         return true;
1073:     }
1074: 
1075:     /**
1076:      * Set the iquery.
1077:      *
1078:      * @param string $iquery
1079:      * @return bool
1080:      */
1081:     public function set_query($iquery)
1082:     {
1083:         if ($iquery === null)
1084:         {
1085:             $this->iquery = null;
1086:         }
1087:         else
1088:         {
1089:             $this->iquery = $this->replace_invalid_with_pct_encoding($iquery, '!$&\'()*+,;=:@/?', true);
1090:             $this->scheme_normalization();
1091:         }
1092:         return true;
1093:     }
1094: 
1095:     /**
1096:      * Set the ifragment.
1097:      *
1098:      * @param string $ifragment
1099:      * @return bool
1100:      */
1101:     public function set_fragment($ifragment)
1102:     {
1103:         if ($ifragment === null)
1104:         {
1105:             $this->ifragment = null;
1106:         }
1107:         else
1108:         {
1109:             $this->ifragment = $this->replace_invalid_with_pct_encoding($ifragment, '!$&\'()*+,;=:@/?');
1110:             $this->scheme_normalization();
1111:         }
1112:         return true;
1113:     }
1114: 
1115:     /**
1116:      * Convert an IRI to a URI (or parts thereof)
1117:      *
1118:      * @return string
1119:      */
1120:     public function to_uri($string)
1121:     {
1122:         static $non_ascii;
1123:         if (!$non_ascii)
1124:         {
1125:             $non_ascii = implode('', range("\x80", "\xFF"));
1126:         }
1127: 
1128:         $position = 0;
1129:         $strlen = strlen($string);
1130:         while (($position += strcspn($string, $non_ascii, $position)) < $strlen)
1131:         {
1132:             $string = substr_replace($string, sprintf('%%%02X', ord($string[$position])), $position, 1);
1133:             $position += 3;
1134:             $strlen += 2;
1135:         }
1136: 
1137:         return $string;
1138:     }
1139: 
1140:     /**
1141:      * Get the complete IRI
1142:      *
1143:      * @return string
1144:      */
1145:     public function get_iri()
1146:     {
1147:         if (!$this->is_valid())
1148:         {
1149:             return false;
1150:         }
1151: 
1152:         $iri = '';
1153:         if ($this->scheme !== null)
1154:         {
1155:             $iri .= $this->scheme . ':';
1156:         }
1157:         if (($iauthority = $this->get_iauthority()) !== null)
1158:         {
1159:             $iri .= '//' . $iauthority;
1160:         }
1161:         if ($this->ipath !== '')
1162:         {
1163:             $iri .= $this->ipath;
1164:         }
1165:         elseif (!empty($this->normalization[$this->scheme]['ipath']) && $iauthority !== null && $iauthority !== '')
1166:         {
1167:             $iri .= $this->normalization[$this->scheme]['ipath'];
1168:         }
1169:         if ($this->iquery !== null)
1170:         {
1171:             $iri .= '?' . $this->iquery;
1172:         }
1173:         if ($this->ifragment !== null)
1174:         {
1175:             $iri .= '#' . $this->ifragment;
1176:         }
1177: 
1178:         return $iri;
1179:     }
1180: 
1181:     /**
1182:      * Get the complete URI
1183:      *
1184:      * @return string
1185:      */
1186:     public function get_uri()
1187:     {
1188:         return $this->to_uri($this->get_iri());
1189:     }
1190: 
1191:     /**
1192:      * Get the complete iauthority
1193:      *
1194:      * @return string
1195:      */
1196:     protected function get_iauthority()
1197:     {
1198:         if ($this->iuserinfo !== null || $this->ihost !== null || $this->port !== null)
1199:         {
1200:             $iauthority = '';
1201:             if ($this->iuserinfo !== null)
1202:             {
1203:                 $iauthority .= $this->iuserinfo . '@';
1204:             }
1205:             if ($this->ihost !== null)
1206:             {
1207:                 $iauthority .= $this->ihost;
1208:             }
1209:             if ($this->port !== null)
1210:             {
1211:                 $iauthority .= ':' . $this->port;
1212:             }
1213:             return $iauthority;
1214:         }
1215:         else
1216:         {
1217:             return null;
1218:         }
1219:     }
1220: 
1221:     /**
1222:      * Get the complete authority
1223:      *
1224:      * @return string
1225:      */
1226:     protected function get_authority()
1227:     {
1228:         $iauthority = $this->get_iauthority();
1229:         if (is_string($iauthority))
1230:             return $this->to_uri($iauthority);
1231:         else
1232:             return $iauthority;
1233:     }
1234: }
1235: 

Show some love! Wishlists for Geoffrey, Ryan P., and Ryan M.

SimplePie is © 2004–2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue and contributors. Licensed under the BSD License. Hosted thanks to Matt Mullenweg, API documentation generated by ApiGen 2.6.1. Variation on the Feed Icon by Wolfgang Bartelme.