rebuildParser.php 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229
  1. <?php
  2. $grammarFile = __DIR__ . '/zend_language_parser.phpy';
  3. $skeletonFile = __DIR__ . '/kmyacc.php.parser';
  4. $tmpGrammarFile = __DIR__ . '/tmp_parser.phpy';
  5. $tmpResultFile = __DIR__ . '/tmp_parser.php';
  6. $parserResultFile = __DIR__ . '/../lib/PhpParser/Parser.php';
  7. // check for kmyacc.exe binary in this directory, otherwise fall back to global name
  8. $kmyacc = __DIR__ . '/kmyacc.exe';
  9. if (!file_exists($kmyacc)) {
  10. $kmyacc = 'kmyacc';
  11. }
  12. $options = array_flip($argv);
  13. $optionDebug = isset($options['--debug']);
  14. $optionKeepTmpGrammar = isset($options['--keep-tmp-grammar']);
  15. ///////////////////////////////
  16. /// Utility regex constants ///
  17. ///////////////////////////////
  18. const LIB = '(?(DEFINE)
  19. (?<singleQuotedString>\'[^\\\\\']*+(?:\\\\.[^\\\\\']*+)*+\')
  20. (?<doubleQuotedString>"[^\\\\"]*+(?:\\\\.[^\\\\"]*+)*+")
  21. (?<string>(?&singleQuotedString)|(?&doubleQuotedString))
  22. (?<comment>/\*[^*]*+(?:\*(?!/)[^*]*+)*+\*/)
  23. (?<code>\{[^\'"/{}]*+(?:(?:(?&string)|(?&comment)|(?&code)|/)[^\'"/{}]*+)*+})
  24. )';
  25. const PARAMS = '\[(?<params>[^[\]]*+(?:\[(?&params)\][^[\]]*+)*+)\]';
  26. const ARGS = '\((?<args>[^()]*+(?:\((?&args)\)[^()]*+)*+)\)';
  27. ///////////////////
  28. /// Main script ///
  29. ///////////////////
  30. echo 'Building temporary preproprocessed grammar file.', "\n";
  31. $grammarCode = file_get_contents($grammarFile);
  32. $grammarCode = resolveNodes($grammarCode);
  33. $grammarCode = resolveMacros($grammarCode);
  34. $grammarCode = resolveArrays($grammarCode);
  35. $grammarCode = resolveStackAccess($grammarCode);
  36. file_put_contents($tmpGrammarFile, $grammarCode);
  37. $additionalArgs = $optionDebug ? '-t -v' : '';
  38. echo "Building parser.\n";
  39. $output = trim(shell_exec("$kmyacc $additionalArgs -l -m $skeletonFile $tmpGrammarFile 2>&1"));
  40. echo "Output: \"$output\"\n";
  41. $resultCode = file_get_contents($tmpResultFile);
  42. $resultCode = removeTrailingWhitespace($resultCode);
  43. ensureDirExists(dirname($parserResultFile));
  44. file_put_contents($parserResultFile, $resultCode);
  45. unlink($tmpResultFile);
  46. if (!$optionKeepTmpGrammar) {
  47. unlink($tmpGrammarFile);
  48. }
  49. ///////////////////////////////
  50. /// Preprocessing functions ///
  51. ///////////////////////////////
  52. function resolveNodes($code) {
  53. return preg_replace_callback(
  54. '~(?<name>[A-Z][a-zA-Z_\\\\]++)\s*' . PARAMS . '~',
  55. function($matches) {
  56. // recurse
  57. $matches['params'] = resolveNodes($matches['params']);
  58. $params = magicSplit(
  59. '(?:' . PARAMS . '|' . ARGS . ')(*SKIP)(*FAIL)|,',
  60. $matches['params']
  61. );
  62. $paramCode = '';
  63. foreach ($params as $param) {
  64. $paramCode .= $param . ', ';
  65. }
  66. return 'new ' . $matches['name'] . '(' . $paramCode . 'attributes())';
  67. },
  68. $code
  69. );
  70. }
  71. function resolveMacros($code) {
  72. return preg_replace_callback(
  73. '~\b(?<!::|->)(?!array\()(?<name>[a-z][A-Za-z]++)' . ARGS . '~',
  74. function($matches) {
  75. // recurse
  76. $matches['args'] = resolveMacros($matches['args']);
  77. $name = $matches['name'];
  78. $args = magicSplit(
  79. '(?:' . PARAMS . '|' . ARGS . ')(*SKIP)(*FAIL)|,',
  80. $matches['args']
  81. );
  82. if ('attributes' == $name) {
  83. assertArgs(0, $args, $name);
  84. return '$this->startAttributeStack[#1] + $this->endAttributes';
  85. }
  86. if ('init' == $name) {
  87. return '$$ = array(' . implode(', ', $args) . ')';
  88. }
  89. if ('push' == $name) {
  90. assertArgs(2, $args, $name);
  91. return $args[0] . '[] = ' . $args[1] . '; $$ = ' . $args[0];
  92. }
  93. if ('pushNormalizing' == $name) {
  94. assertArgs(2, $args, $name);
  95. return 'if (is_array(' . $args[1] . ')) { $$ = array_merge(' . $args[0] . ', ' . $args[1] . '); } else { ' . $args[0] . '[] = ' . $args[1] . '; $$ = ' . $args[0] . '; }';
  96. }
  97. if ('toArray' == $name) {
  98. assertArgs(1, $args, $name);
  99. return 'is_array(' . $args[0] . ') ? ' . $args[0] . ' : array(' . $args[0] . ')';
  100. }
  101. if ('parseVar' == $name) {
  102. assertArgs(1, $args, $name);
  103. return 'substr(' . $args[0] . ', 1)';
  104. }
  105. if ('parseEncapsed' == $name) {
  106. assertArgs(2, $args, $name);
  107. return 'foreach (' . $args[0] . ' as &$s) { if (is_string($s)) { $s = Node\Scalar\String_::parseEscapeSequences($s, ' . $args[1] . '); } }';
  108. }
  109. if ('parseEncapsedDoc' == $name) {
  110. assertArgs(1, $args, $name);
  111. return 'foreach (' . $args[0] . ' as &$s) { if (is_string($s)) { $s = Node\Scalar\String_::parseEscapeSequences($s, null); } } $s = preg_replace(\'~(\r\n|\n|\r)\z~\', \'\', $s); if (\'\' === $s) array_pop(' . $args[0] . ');';
  112. }
  113. return $matches[0];
  114. },
  115. $code
  116. );
  117. }
  118. function assertArgs($num, $args, $name) {
  119. if ($num != count($args)) {
  120. die('Wrong argument count for ' . $name . '().');
  121. }
  122. }
  123. function resolveArrays($code) {
  124. return preg_replace_callback(
  125. '~' . PARAMS . '~',
  126. function ($matches) {
  127. $elements = magicSplit(
  128. '(?:' . PARAMS . '|' . ARGS . ')(*SKIP)(*FAIL)|,',
  129. $matches['params']
  130. );
  131. // don't convert [] to array, it might have different meaning
  132. if (empty($elements)) {
  133. return $matches[0];
  134. }
  135. $elementCodes = array();
  136. foreach ($elements as $element) {
  137. // convert only arrays where all elements have keys
  138. if (false === strpos($element, ':')) {
  139. return $matches[0];
  140. }
  141. list($key, $value) = explode(':', $element, 2);
  142. $elementCodes[] = "'" . $key . "' =>" . $value;
  143. }
  144. return 'array(' . implode(', ', $elementCodes) . ')';
  145. },
  146. $code
  147. );
  148. }
  149. function resolveStackAccess($code) {
  150. $code = preg_replace('/\$\d+/', '$this->semStack[$0]', $code);
  151. $code = preg_replace('/#(\d+)/', '$$1', $code);
  152. return $code;
  153. }
  154. function removeTrailingWhitespace($code) {
  155. $lines = explode("\n", $code);
  156. $lines = array_map('rtrim', $lines);
  157. return implode("\n", $lines);
  158. }
  159. function ensureDirExists($dir) {
  160. if (!is_dir($dir)) {
  161. mkdir($dir, 0777, true);
  162. }
  163. }
  164. //////////////////////////////
  165. /// Regex helper functions ///
  166. //////////////////////////////
  167. function regex($regex) {
  168. return '~' . LIB . '(?:' . str_replace('~', '\~', $regex) . ')~';
  169. }
  170. function magicSplit($regex, $string) {
  171. $pieces = preg_split(regex('(?:(?&string)|(?&comment)|(?&code))(*SKIP)(*FAIL)|' . $regex), $string);
  172. foreach ($pieces as &$piece) {
  173. $piece = trim($piece);
  174. }
  175. return array_filter($pieces);
  176. }