PercentEncoder.php 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. <?php
  2. /**
  3. * Class that handles operations involving percent-encoding in URIs.
  4. *
  5. * @warning
  6. * Be careful when reusing instances of PercentEncoder. The object
  7. * you use for normalize() SHOULD NOT be used for encode(), or
  8. * vice-versa.
  9. */
  10. class HTMLPurifier_PercentEncoder
  11. {
  12. /**
  13. * Reserved characters to preserve when using encode().
  14. * @type array
  15. */
  16. protected $preserve = array();
  17. /**
  18. * String of characters that should be preserved while using encode().
  19. * @param bool $preserve
  20. */
  21. public function __construct($preserve = false)
  22. {
  23. // unreserved letters, ought to const-ify
  24. for ($i = 48; $i <= 57; $i++) { // digits
  25. $this->preserve[$i] = true;
  26. }
  27. for ($i = 65; $i <= 90; $i++) { // upper-case
  28. $this->preserve[$i] = true;
  29. }
  30. for ($i = 97; $i <= 122; $i++) { // lower-case
  31. $this->preserve[$i] = true;
  32. }
  33. $this->preserve[45] = true; // Dash -
  34. $this->preserve[46] = true; // Period .
  35. $this->preserve[95] = true; // Underscore _
  36. $this->preserve[126]= true; // Tilde ~
  37. // extra letters not to escape
  38. if ($preserve !== false) {
  39. for ($i = 0, $c = strlen($preserve); $i < $c; $i++) {
  40. $this->preserve[ord($preserve[$i])] = true;
  41. }
  42. }
  43. }
  44. /**
  45. * Our replacement for urlencode, it encodes all non-reserved characters,
  46. * as well as any extra characters that were instructed to be preserved.
  47. * @note
  48. * Assumes that the string has already been normalized, making any
  49. * and all percent escape sequences valid. Percents will not be
  50. * re-escaped, regardless of their status in $preserve
  51. * @param string $string String to be encoded
  52. * @return string Encoded string.
  53. */
  54. public function encode($string)
  55. {
  56. $ret = '';
  57. for ($i = 0, $c = strlen($string); $i < $c; $i++) {
  58. if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])])) {
  59. $ret .= '%' . sprintf('%02X', $int);
  60. } else {
  61. $ret .= $string[$i];
  62. }
  63. }
  64. return $ret;
  65. }
  66. /**
  67. * Fix up percent-encoding by decoding unreserved characters and normalizing.
  68. * @warning This function is affected by $preserve, even though the
  69. * usual desired behavior is for this not to preserve those
  70. * characters. Be careful when reusing instances of PercentEncoder!
  71. * @param string $string String to normalize
  72. * @return string
  73. */
  74. public function normalize($string)
  75. {
  76. if ($string == '') {
  77. return '';
  78. }
  79. $parts = explode('%', $string);
  80. $ret = array_shift($parts);
  81. foreach ($parts as $part) {
  82. $length = strlen($part);
  83. if ($length < 2) {
  84. $ret .= '%25' . $part;
  85. continue;
  86. }
  87. $encoding = substr($part, 0, 2);
  88. $text = substr($part, 2);
  89. if (!ctype_xdigit($encoding)) {
  90. $ret .= '%25' . $part;
  91. continue;
  92. }
  93. $int = hexdec($encoding);
  94. if (isset($this->preserve[$int])) {
  95. $ret .= chr($int) . $text;
  96. continue;
  97. }
  98. $encoding = strtoupper($encoding);
  99. $ret .= '%' . $encoding . $text;
  100. }
  101. return $ret;
  102. }
  103. }
  104. // vim: et sw=4 sts=4