Emulative.php 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232
  1. <?php
  2. namespace PhpParser\Lexer;
  3. use PhpParser\Parser;
  4. /**
  5. * ATTENTION: This code is WRITE-ONLY. Do not try to read it.
  6. */
  7. class Emulative extends \PhpParser\Lexer
  8. {
  9. protected $newKeywords;
  10. protected $inObjectAccess;
  11. const T_ELLIPSIS = 1001;
  12. const T_POW = 1002;
  13. const T_POW_EQUAL = 1003;
  14. const T_COALESCE = 1004;
  15. const T_SPACESHIP = 1005;
  16. const T_YIELD_FROM = 1006;
  17. const PHP_7_0 = '7.0.0dev';
  18. const PHP_5_6 = '5.6.0rc1';
  19. const PHP_5_5 = '5.5.0beta1';
  20. const PHP_5_4 = '5.4.0beta1';
  21. public function __construct(array $options = array()) {
  22. parent::__construct($options);
  23. $newKeywordsPerVersion = array(
  24. self::PHP_5_5 => array(
  25. 'finally' => Parser::T_FINALLY,
  26. 'yield' => Parser::T_YIELD,
  27. ),
  28. self::PHP_5_4 => array(
  29. 'callable' => Parser::T_CALLABLE,
  30. 'insteadof' => Parser::T_INSTEADOF,
  31. 'trait' => Parser::T_TRAIT,
  32. '__trait__' => Parser::T_TRAIT_C,
  33. ),
  34. );
  35. $this->newKeywords = array();
  36. foreach ($newKeywordsPerVersion as $version => $newKeywords) {
  37. if (version_compare(PHP_VERSION, $version, '>=')) {
  38. break;
  39. }
  40. $this->newKeywords += $newKeywords;
  41. }
  42. if (version_compare(PHP_VERSION, self::PHP_7_0, '>=')) {
  43. return;
  44. }
  45. $this->tokenMap[self::T_COALESCE] = Parser::T_COALESCE;
  46. $this->tokenMap[self::T_SPACESHIP] = Parser::T_SPACESHIP;
  47. $this->tokenMap[self::T_YIELD_FROM] = Parser::T_YIELD_FROM;
  48. if (version_compare(PHP_VERSION, self::PHP_5_6, '>=')) {
  49. return;
  50. }
  51. $this->tokenMap[self::T_ELLIPSIS] = Parser::T_ELLIPSIS;
  52. $this->tokenMap[self::T_POW] = Parser::T_POW;
  53. $this->tokenMap[self::T_POW_EQUAL] = Parser::T_POW_EQUAL;
  54. }
  55. public function startLexing($code) {
  56. $this->inObjectAccess = false;
  57. $preprocessedCode = $this->preprocessCode($code);
  58. parent::startLexing($preprocessedCode);
  59. if ($preprocessedCode !== $code) {
  60. $this->postprocessTokens();
  61. }
  62. // Set code property back to the original code, so __halt_compiler()
  63. // handling and (start|end)FilePos attributes use the correct offsets
  64. $this->code = $code;
  65. }
  66. /*
  67. * Replaces new features in the code by ~__EMU__{NAME}__{DATA}__~ sequences.
  68. * ~LABEL~ is never valid PHP code, that's why we can (to some degree) safely
  69. * use it here.
  70. * Later when preprocessing the tokens these sequences will either be replaced
  71. * by real tokens or replaced with their original content (e.g. if they occurred
  72. * inside a string, i.e. a place where they don't have a special meaning).
  73. */
  74. protected function preprocessCode($code) {
  75. if (version_compare(PHP_VERSION, self::PHP_7_0, '>=')) {
  76. return $code;
  77. }
  78. $code = str_replace('??', '~__EMU__COALESCE__~', $code);
  79. $code = str_replace('<=>', '~__EMU__SPACESHIP__~', $code);
  80. $code = preg_replace_callback('(yield[ \n\r\t]+from)', function($matches) {
  81. // Encoding $0 in order to preserve exact whitespace
  82. return '~__EMU__YIELDFROM__' . bin2hex($matches[0]) . '__~';
  83. }, $code);
  84. if (version_compare(PHP_VERSION, self::PHP_5_6, '>=')) {
  85. return $code;
  86. }
  87. $code = str_replace('...', '~__EMU__ELLIPSIS__~', $code);
  88. $code = preg_replace('((?<!/)\*\*=)', '~__EMU__POWEQUAL__~', $code);
  89. $code = preg_replace('((?<!/)\*\*(?!/))', '~__EMU__POW__~', $code);
  90. if (version_compare(PHP_VERSION, self::PHP_5_4, '>=')) {
  91. return $code;
  92. }
  93. // binary notation (0b010101101001...)
  94. return preg_replace('(\b0b[01]+\b)', '~__EMU__BINARY__$0__~', $code);
  95. }
  96. /*
  97. * Replaces the ~__EMU__...~ sequences with real tokens or their original
  98. * value.
  99. */
  100. protected function postprocessTokens() {
  101. // we need to manually iterate and manage a count because we'll change
  102. // the tokens array on the way
  103. for ($i = 0, $c = count($this->tokens); $i < $c; ++$i) {
  104. // first check that the following tokens are of form ~LABEL~,
  105. // then match the __EMU__... sequence.
  106. if ('~' === $this->tokens[$i]
  107. && isset($this->tokens[$i + 2])
  108. && '~' === $this->tokens[$i + 2]
  109. && T_STRING === $this->tokens[$i + 1][0]
  110. && preg_match('(^__EMU__([A-Z]++)__(?:([A-Za-z0-9]++)__)?$)', $this->tokens[$i + 1][1], $matches)
  111. ) {
  112. if ('BINARY' === $matches[1]) {
  113. // the binary number can either be an integer or a double, so return a LNUMBER
  114. // or DNUMBER respectively
  115. $isInt = is_int(bindec($matches[2]));
  116. $replace = array(
  117. array($isInt ? T_LNUMBER : T_DNUMBER, $matches[2], $this->tokens[$i + 1][2])
  118. );
  119. } else if ('ELLIPSIS' === $matches[1]) {
  120. $replace = array(
  121. array(self::T_ELLIPSIS, '...', $this->tokens[$i + 1][2])
  122. );
  123. } else if ('POW' === $matches[1]) {
  124. $replace = array(
  125. array(self::T_POW, '**', $this->tokens[$i + 1][2])
  126. );
  127. } else if ('POWEQUAL' === $matches[1]) {
  128. $replace = array(
  129. array(self::T_POW_EQUAL, '**=', $this->tokens[$i + 1][2])
  130. );
  131. } else if ('COALESCE' === $matches[1]) {
  132. $replace = array(
  133. array(self::T_COALESCE, '??', $this->tokens[$i + 1][2])
  134. );
  135. } else if ('SPACESHIP' === $matches[1]) {
  136. $replace = array(
  137. array(self::T_SPACESHIP, '<=>', $this->tokens[$i + 1][2]),
  138. );
  139. } else if ('YIELDFROM' === $matches[1]) {
  140. $content = $this->hex2bin($matches[2]);
  141. $replace = array(
  142. array(self::T_YIELD_FROM, $content, $this->tokens[$i + 1][2] - substr_count($content, "\n"))
  143. );
  144. } else {
  145. throw new \RuntimeException('Invalid __EMU__ sequence');
  146. }
  147. array_splice($this->tokens, $i, 3, $replace);
  148. $c -= 3 - count($replace);
  149. // for multichar tokens (e.g. strings) replace any ~__EMU__...~ sequences
  150. // in their content with the original character sequence
  151. } elseif (is_array($this->tokens[$i])
  152. && 0 !== strpos($this->tokens[$i][1], '__EMU__')
  153. ) {
  154. $this->tokens[$i][1] = preg_replace_callback(
  155. '(~__EMU__([A-Z]++)__(?:([A-Za-z0-9]++)__)?~)',
  156. array($this, 'restoreContentCallback'),
  157. $this->tokens[$i][1]
  158. );
  159. }
  160. }
  161. }
  162. /*
  163. * This method is a callback for restoring EMU sequences in
  164. * multichar tokens (like strings) to their original value.
  165. */
  166. public function restoreContentCallback(array $matches) {
  167. if ('BINARY' === $matches[1]) {
  168. return $matches[2];
  169. } else if ('ELLIPSIS' === $matches[1]) {
  170. return '...';
  171. } else if ('POW' === $matches[1]) {
  172. return '**';
  173. } else if ('POWEQUAL' === $matches[1]) {
  174. return '**=';
  175. } else if ('COALESCE' === $matches[1]) {
  176. return '??';
  177. } else if ('SPACESHIP' === $matches[1]) {
  178. return '<=>';
  179. } else if ('YIELDFROM' === $matches[1]) {
  180. return $this->hex2bin($matches[2]);
  181. } else {
  182. return $matches[0];
  183. }
  184. }
  185. private function hex2bin($str) {
  186. // TODO Drop when removing support for PHP 5.3
  187. return pack('H*', $str);
  188. }
  189. public function getNextToken(&$value = null, &$startAttributes = null, &$endAttributes = null) {
  190. $token = parent::getNextToken($value, $startAttributes, $endAttributes);
  191. // replace new keywords by their respective tokens. This is not done
  192. // if we currently are in an object access (e.g. in $obj->namespace
  193. // "namespace" stays a T_STRING tokens and isn't converted to T_NAMESPACE)
  194. if (Parser::T_STRING === $token && !$this->inObjectAccess) {
  195. if (isset($this->newKeywords[strtolower($value)])) {
  196. return $this->newKeywords[strtolower($value)];
  197. }
  198. } else {
  199. // keep track of whether we currently are in an object access (after ->)
  200. $this->inObjectAccess = Parser::T_OBJECT_OPERATOR === $token;
  201. }
  202. return $token;
  203. }
  204. }