123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232 |
- <?php
- namespace PhpParser\Lexer;
- use PhpParser\Parser;
- /**
- * ATTENTION: This code is WRITE-ONLY. Do not try to read it.
- */
- class Emulative extends \PhpParser\Lexer
- {
- protected $newKeywords;
- protected $inObjectAccess;
- const T_ELLIPSIS = 1001;
- const T_POW = 1002;
- const T_POW_EQUAL = 1003;
- const T_COALESCE = 1004;
- const T_SPACESHIP = 1005;
- const T_YIELD_FROM = 1006;
- const PHP_7_0 = '7.0.0dev';
- const PHP_5_6 = '5.6.0rc1';
- const PHP_5_5 = '5.5.0beta1';
- const PHP_5_4 = '5.4.0beta1';
- public function __construct(array $options = array()) {
- parent::__construct($options);
- $newKeywordsPerVersion = array(
- self::PHP_5_5 => array(
- 'finally' => Parser::T_FINALLY,
- 'yield' => Parser::T_YIELD,
- ),
- self::PHP_5_4 => array(
- 'callable' => Parser::T_CALLABLE,
- 'insteadof' => Parser::T_INSTEADOF,
- 'trait' => Parser::T_TRAIT,
- '__trait__' => Parser::T_TRAIT_C,
- ),
- );
- $this->newKeywords = array();
- foreach ($newKeywordsPerVersion as $version => $newKeywords) {
- if (version_compare(PHP_VERSION, $version, '>=')) {
- break;
- }
- $this->newKeywords += $newKeywords;
- }
- if (version_compare(PHP_VERSION, self::PHP_7_0, '>=')) {
- return;
- }
- $this->tokenMap[self::T_COALESCE] = Parser::T_COALESCE;
- $this->tokenMap[self::T_SPACESHIP] = Parser::T_SPACESHIP;
- $this->tokenMap[self::T_YIELD_FROM] = Parser::T_YIELD_FROM;
- if (version_compare(PHP_VERSION, self::PHP_5_6, '>=')) {
- return;
- }
- $this->tokenMap[self::T_ELLIPSIS] = Parser::T_ELLIPSIS;
- $this->tokenMap[self::T_POW] = Parser::T_POW;
- $this->tokenMap[self::T_POW_EQUAL] = Parser::T_POW_EQUAL;
- }
- public function startLexing($code) {
- $this->inObjectAccess = false;
- $preprocessedCode = $this->preprocessCode($code);
- parent::startLexing($preprocessedCode);
- if ($preprocessedCode !== $code) {
- $this->postprocessTokens();
- }
- // Set code property back to the original code, so __halt_compiler()
- // handling and (start|end)FilePos attributes use the correct offsets
- $this->code = $code;
- }
- /*
- * Replaces new features in the code by ~__EMU__{NAME}__{DATA}__~ sequences.
- * ~LABEL~ is never valid PHP code, that's why we can (to some degree) safely
- * use it here.
- * Later when preprocessing the tokens these sequences will either be replaced
- * by real tokens or replaced with their original content (e.g. if they occurred
- * inside a string, i.e. a place where they don't have a special meaning).
- */
- protected function preprocessCode($code) {
- if (version_compare(PHP_VERSION, self::PHP_7_0, '>=')) {
- return $code;
- }
- $code = str_replace('??', '~__EMU__COALESCE__~', $code);
- $code = str_replace('<=>', '~__EMU__SPACESHIP__~', $code);
- $code = preg_replace_callback('(yield[ \n\r\t]+from)', function($matches) {
- // Encoding $0 in order to preserve exact whitespace
- return '~__EMU__YIELDFROM__' . bin2hex($matches[0]) . '__~';
- }, $code);
- if (version_compare(PHP_VERSION, self::PHP_5_6, '>=')) {
- return $code;
- }
- $code = str_replace('...', '~__EMU__ELLIPSIS__~', $code);
- $code = preg_replace('((?<!/)\*\*=)', '~__EMU__POWEQUAL__~', $code);
- $code = preg_replace('((?<!/)\*\*(?!/))', '~__EMU__POW__~', $code);
- if (version_compare(PHP_VERSION, self::PHP_5_4, '>=')) {
- return $code;
- }
- // binary notation (0b010101101001...)
- return preg_replace('(\b0b[01]+\b)', '~__EMU__BINARY__$0__~', $code);
- }
- /*
- * Replaces the ~__EMU__...~ sequences with real tokens or their original
- * value.
- */
- protected function postprocessTokens() {
- // we need to manually iterate and manage a count because we'll change
- // the tokens array on the way
- for ($i = 0, $c = count($this->tokens); $i < $c; ++$i) {
- // first check that the following tokens are of form ~LABEL~,
- // then match the __EMU__... sequence.
- if ('~' === $this->tokens[$i]
- && isset($this->tokens[$i + 2])
- && '~' === $this->tokens[$i + 2]
- && T_STRING === $this->tokens[$i + 1][0]
- && preg_match('(^__EMU__([A-Z]++)__(?:([A-Za-z0-9]++)__)?$)', $this->tokens[$i + 1][1], $matches)
- ) {
- if ('BINARY' === $matches[1]) {
- // the binary number can either be an integer or a double, so return a LNUMBER
- // or DNUMBER respectively
- $isInt = is_int(bindec($matches[2]));
- $replace = array(
- array($isInt ? T_LNUMBER : T_DNUMBER, $matches[2], $this->tokens[$i + 1][2])
- );
- } else if ('ELLIPSIS' === $matches[1]) {
- $replace = array(
- array(self::T_ELLIPSIS, '...', $this->tokens[$i + 1][2])
- );
- } else if ('POW' === $matches[1]) {
- $replace = array(
- array(self::T_POW, '**', $this->tokens[$i + 1][2])
- );
- } else if ('POWEQUAL' === $matches[1]) {
- $replace = array(
- array(self::T_POW_EQUAL, '**=', $this->tokens[$i + 1][2])
- );
- } else if ('COALESCE' === $matches[1]) {
- $replace = array(
- array(self::T_COALESCE, '??', $this->tokens[$i + 1][2])
- );
- } else if ('SPACESHIP' === $matches[1]) {
- $replace = array(
- array(self::T_SPACESHIP, '<=>', $this->tokens[$i + 1][2]),
- );
- } else if ('YIELDFROM' === $matches[1]) {
- $content = $this->hex2bin($matches[2]);
- $replace = array(
- array(self::T_YIELD_FROM, $content, $this->tokens[$i + 1][2] - substr_count($content, "\n"))
- );
- } else {
- throw new \RuntimeException('Invalid __EMU__ sequence');
- }
- array_splice($this->tokens, $i, 3, $replace);
- $c -= 3 - count($replace);
- // for multichar tokens (e.g. strings) replace any ~__EMU__...~ sequences
- // in their content with the original character sequence
- } elseif (is_array($this->tokens[$i])
- && 0 !== strpos($this->tokens[$i][1], '__EMU__')
- ) {
- $this->tokens[$i][1] = preg_replace_callback(
- '(~__EMU__([A-Z]++)__(?:([A-Za-z0-9]++)__)?~)',
- array($this, 'restoreContentCallback'),
- $this->tokens[$i][1]
- );
- }
- }
- }
- /*
- * This method is a callback for restoring EMU sequences in
- * multichar tokens (like strings) to their original value.
- */
- public function restoreContentCallback(array $matches) {
- if ('BINARY' === $matches[1]) {
- return $matches[2];
- } else if ('ELLIPSIS' === $matches[1]) {
- return '...';
- } else if ('POW' === $matches[1]) {
- return '**';
- } else if ('POWEQUAL' === $matches[1]) {
- return '**=';
- } else if ('COALESCE' === $matches[1]) {
- return '??';
- } else if ('SPACESHIP' === $matches[1]) {
- return '<=>';
- } else if ('YIELDFROM' === $matches[1]) {
- return $this->hex2bin($matches[2]);
- } else {
- return $matches[0];
- }
- }
- private function hex2bin($str) {
- // TODO Drop when removing support for PHP 5.3
- return pack('H*', $str);
- }
- public function getNextToken(&$value = null, &$startAttributes = null, &$endAttributes = null) {
- $token = parent::getNextToken($value, $startAttributes, $endAttributes);
- // replace new keywords by their respective tokens. This is not done
- // if we currently are in an object access (e.g. in $obj->namespace
- // "namespace" stays a T_STRING tokens and isn't converted to T_NAMESPACE)
- if (Parser::T_STRING === $token && !$this->inObjectAccess) {
- if (isset($this->newKeywords[strtolower($value)])) {
- return $this->newKeywords[strtolower($value)];
- }
- } else {
- // keep track of whether we currently are in an object access (after ->)
- $this->inObjectAccess = Parser::T_OBJECT_OPERATOR === $token;
- }
- return $token;
- }
- }
|