index.js 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. 'use strict';
  2. var objIsRegex = require('is-regex');
  3. exports = (module.exports = parse);
  4. var TOKEN_TYPES = exports.TOKEN_TYPES = {
  5. LINE_COMMENT: '//',
  6. BLOCK_COMMENT: '/**/',
  7. SINGLE_QUOTE: '\'',
  8. DOUBLE_QUOTE: '"',
  9. TEMPLATE_QUOTE: '`',
  10. REGEXP: '//g'
  11. }
  12. var BRACKETS = exports.BRACKETS = {
  13. '(': ')',
  14. '{': '}',
  15. '[': ']'
  16. };
  17. var BRACKETS_REVERSED = {
  18. ')': '(',
  19. '}': '{',
  20. ']': '['
  21. };
  22. exports.parse = parse;
  23. function parse(src, state, options) {
  24. options = options || {};
  25. state = state || exports.defaultState();
  26. var start = options.start || 0;
  27. var end = options.end || src.length;
  28. var index = start;
  29. while (index < end) {
  30. try {
  31. parseChar(src[index], state);
  32. } catch (ex) {
  33. ex.index = index;
  34. throw ex;
  35. }
  36. index++;
  37. }
  38. return state;
  39. }
  40. exports.parseUntil = parseUntil;
  41. function parseUntil(src, delimiter, options) {
  42. options = options || {};
  43. var start = options.start || 0;
  44. var index = start;
  45. var state = exports.defaultState();
  46. while (index < src.length) {
  47. if ((options.ignoreNesting || !state.isNesting(options)) && matches(src, delimiter, index)) {
  48. var end = index;
  49. return {
  50. start: start,
  51. end: end,
  52. src: src.substring(start, end)
  53. };
  54. }
  55. try {
  56. parseChar(src[index], state);
  57. } catch (ex) {
  58. ex.index = index;
  59. throw ex;
  60. }
  61. index++;
  62. }
  63. var err = new Error('The end of the string was reached with no closing bracket found.');
  64. err.code = 'CHARACTER_PARSER:END_OF_STRING_REACHED';
  65. err.index = index;
  66. throw err;
  67. }
  68. exports.parseChar = parseChar;
  69. function parseChar(character, state) {
  70. if (character.length !== 1) {
  71. var err = new Error('Character must be a string of length 1');
  72. err.name = 'InvalidArgumentError';
  73. err.code = 'CHARACTER_PARSER:CHAR_LENGTH_NOT_ONE';
  74. throw err;
  75. }
  76. state = state || exports.defaultState();
  77. state.src += character;
  78. var wasComment = state.isComment();
  79. var lastChar = state.history ? state.history[0] : '';
  80. if (state.regexpStart) {
  81. if (character === '/' || character == '*') {
  82. state.stack.pop();
  83. }
  84. state.regexpStart = false;
  85. }
  86. switch (state.current()) {
  87. case TOKEN_TYPES.LINE_COMMENT:
  88. if (character === '\n') {
  89. state.stack.pop();
  90. }
  91. break;
  92. case TOKEN_TYPES.BLOCK_COMMENT:
  93. if (state.lastChar === '*' && character === '/') {
  94. state.stack.pop();
  95. }
  96. break;
  97. case TOKEN_TYPES.SINGLE_QUOTE:
  98. if (character === '\'' && !state.escaped) {
  99. state.stack.pop();
  100. } else if (character === '\\' && !state.escaped) {
  101. state.escaped = true;
  102. } else {
  103. state.escaped = false;
  104. }
  105. break;
  106. case TOKEN_TYPES.DOUBLE_QUOTE:
  107. if (character === '"' && !state.escaped) {
  108. state.stack.pop();
  109. } else if (character === '\\' && !state.escaped) {
  110. state.escaped = true;
  111. } else {
  112. state.escaped = false;
  113. }
  114. break;
  115. case TOKEN_TYPES.TEMPLATE_QUOTE:
  116. if (character === '`' && !state.escaped) {
  117. state.stack.pop();
  118. state.hasDollar = false;
  119. } else if (character === '\\' && !state.escaped) {
  120. state.escaped = true;
  121. state.hasDollar = false;
  122. } else if (character === '$' && !state.escaped) {
  123. state.hasDollar = true;
  124. } else if (character === '{' && state.hasDollar) {
  125. state.stack.push(BRACKETS[character]);
  126. } else {
  127. state.escaped = false;
  128. state.hasDollar = false;
  129. }
  130. break;
  131. case TOKEN_TYPES.REGEXP:
  132. if (character === '/' && !state.escaped) {
  133. state.stack.pop();
  134. } else if (character === '\\' && !state.escaped) {
  135. state.escaped = true;
  136. } else {
  137. state.escaped = false;
  138. }
  139. break;
  140. default:
  141. if (character in BRACKETS) {
  142. state.stack.push(BRACKETS[character]);
  143. } else if (character in BRACKETS_REVERSED) {
  144. if (state.current() !== character) {
  145. var err = new SyntaxError('Mismatched Bracket: ' + character);
  146. err.code = 'CHARACTER_PARSER:MISMATCHED_BRACKET';
  147. throw err;
  148. };
  149. state.stack.pop();
  150. } else if (lastChar === '/' && character === '/') {
  151. // Don't include comments in history
  152. state.history = state.history.substr(1);
  153. state.stack.push(TOKEN_TYPES.LINE_COMMENT);
  154. } else if (lastChar === '/' && character === '*') {
  155. // Don't include comment in history
  156. state.history = state.history.substr(1);
  157. state.stack.push(TOKEN_TYPES.BLOCK_COMMENT);
  158. } else if (character === '/' && isRegexp(state.history)) {
  159. state.stack.push(TOKEN_TYPES.REGEXP);
  160. // N.B. if the next character turns out to be a `*` or a `/`
  161. // then this isn't actually a regexp
  162. state.regexpStart = true;
  163. } else if (character === '\'') {
  164. state.stack.push(TOKEN_TYPES.SINGLE_QUOTE);
  165. } else if (character === '"') {
  166. state.stack.push(TOKEN_TYPES.DOUBLE_QUOTE);
  167. } else if (character === '`') {
  168. state.stack.push(TOKEN_TYPES.TEMPLATE_QUOTE);
  169. }
  170. break;
  171. }
  172. if (!state.isComment() && !wasComment) {
  173. state.history = character + state.history;
  174. }
  175. state.lastChar = character; // store last character for ending block comments
  176. return state;
  177. }
  178. exports.defaultState = function () { return new State() };
  179. function State() {
  180. this.stack = [];
  181. this.regexpStart = false;
  182. this.escaped = false;
  183. this.hasDollar = false;
  184. this.src = '';
  185. this.history = ''
  186. this.lastChar = ''
  187. }
  188. State.prototype.current = function () {
  189. return this.stack[this.stack.length - 1];
  190. };
  191. State.prototype.isString = function () {
  192. return (
  193. this.current() === TOKEN_TYPES.SINGLE_QUOTE ||
  194. this.current() === TOKEN_TYPES.DOUBLE_QUOTE ||
  195. this.current() === TOKEN_TYPES.TEMPLATE_QUOTE
  196. );
  197. }
  198. State.prototype.isComment = function () {
  199. return this.current() === TOKEN_TYPES.LINE_COMMENT || this.current() === TOKEN_TYPES.BLOCK_COMMENT;
  200. }
  201. State.prototype.isNesting = function (opts) {
  202. if (
  203. opts && opts.ignoreLineComment &&
  204. this.stack.length === 1 && this.stack[0] === TOKEN_TYPES.LINE_COMMENT
  205. ) {
  206. // if we are only inside a line comment, and line comments are ignored
  207. // don't count it as nesting
  208. return false;
  209. }
  210. return !!this.stack.length;
  211. }
  212. function matches(str, matcher, i) {
  213. if (objIsRegex(matcher)) {
  214. return matcher.test(str.substr(i || 0));
  215. } else {
  216. return str.substr(i || 0, matcher.length) === matcher;
  217. }
  218. }
  219. exports.isPunctuator = isPunctuator
  220. function isPunctuator(c) {
  221. if (!c) return true; // the start of a string is a punctuator
  222. var code = c.charCodeAt(0)
  223. switch (code) {
  224. case 46: // . dot
  225. case 40: // ( open bracket
  226. case 41: // ) close bracket
  227. case 59: // ; semicolon
  228. case 44: // , comma
  229. case 123: // { open curly brace
  230. case 125: // } close curly brace
  231. case 91: // [
  232. case 93: // ]
  233. case 58: // :
  234. case 63: // ?
  235. case 126: // ~
  236. case 37: // %
  237. case 38: // &
  238. case 42: // *:
  239. case 43: // +
  240. case 45: // -
  241. case 47: // /
  242. case 60: // <
  243. case 62: // >
  244. case 94: // ^
  245. case 124: // |
  246. case 33: // !
  247. case 61: // =
  248. return true;
  249. default:
  250. return false;
  251. }
  252. }
  253. exports.isKeyword = isKeyword
  254. function isKeyword(id) {
  255. return (id === 'if') || (id === 'in') || (id === 'do') || (id === 'var') || (id === 'for') || (id === 'new') ||
  256. (id === 'try') || (id === 'let') || (id === 'this') || (id === 'else') || (id === 'case') ||
  257. (id === 'void') || (id === 'with') || (id === 'enum') || (id === 'while') || (id === 'break') || (id === 'catch') ||
  258. (id === 'throw') || (id === 'const') || (id === 'yield') || (id === 'class') || (id === 'super') ||
  259. (id === 'return') || (id === 'typeof') || (id === 'delete') || (id === 'switch') || (id === 'export') ||
  260. (id === 'import') || (id === 'default') || (id === 'finally') || (id === 'extends') || (id === 'function') ||
  261. (id === 'continue') || (id === 'debugger') || (id === 'package') || (id === 'private') || (id === 'interface') ||
  262. (id === 'instanceof') || (id === 'implements') || (id === 'protected') || (id === 'public') || (id === 'static');
  263. }
  264. function isRegexp(history) {
  265. //could be start of regexp or divide sign
  266. history = history.replace(/^\s*/, '');
  267. //unless its an `if`, `while`, `for` or `with` it's a divide, so we assume it's a divide
  268. if (history[0] === ')') return false;
  269. //unless it's a function expression, it's a regexp, so we assume it's a regexp
  270. if (history[0] === '}') return true;
  271. //any punctuation means it's a regexp
  272. if (isPunctuator(history[0])) return true;
  273. //if the last thing was a keyword then it must be a regexp (e.g. `typeof /foo/`)
  274. if (/^\w+\b/.test(history) && isKeyword(/^\w+\b/.exec(history)[0].split('').reverse().join(''))) return true;
  275. return false;
  276. }