parser.js 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395
  1. //@+leo-ver=5-thin
  2. //@+node:caminhante.20210807141115.1: * @file parser.js
  3. //@@language javascript
  4. //@@tabwidth -2
  5. //@+others
  6. //@+node:caminhante.20210815184051.1: ** /funções auxiliares
  7. //@+node:caminhante.20210815184057.1: *3* typeConstraint
  8. function typeConstraint (obj, argpos, type) {
  9. var cn = arguments.callee.caller.name
  10. function error (c) {
  11. throw "Type constraint "+c+" violated at "+cn+"->arg("+argpos+")"
  12. }
  13. switch (type) {
  14. case 'string': if (typeof(obj) != 'string' && !(obj instanceof String)) {error(type)}; break
  15. case 'integer': if (isNaN(obj) || ((obj | 0) !== obj)) {error(type)}; break
  16. case 'number': if (isNaN(obj) || isNaN(parseFloat(obj))) {error(type)}; break
  17. case 'bool': if (typeof(obj) !== 'undefined' && typeof(obj) !== 'boolean') {error(type)}; break
  18. default: if (!type(obj)) {error(type.name)}
  19. }
  20. }
  21. //@+node:caminhante.20210813002512.1: *3* atEOS
  22. // cursor at the end of source code
  23. function atEOS (src) {
  24. typeConstraint(src,1,Type_SourceCode)
  25. return (src.pos() == src.source().length)
  26. }
  27. //@+node:caminhante.20210904195538.1: *3* last
  28. function last (v) {
  29. return v[v.length-1]
  30. }
  31. //@+node:caminhante.20210807141136.1: ** /interfaces
  32. //@+node:caminhante.20210807141201.1: *3* Type_SourceCode
  33. function Type_SourceCode (it) {
  34. return it.source && it.pos && it.line && it.column && true || false
  35. }
  36. //@+node:caminhante.20210807141204.1: *3* Type_SourceCodeToken
  37. function Type_SourceCodeToken (it) {
  38. return it.token && it.tokenLength && it.success && it.atEnd && true || false
  39. }
  40. //@+node:caminhante.20210906214910.1: *3* Type_ParserCombinator
  41. // interface to a class that receives parsers and combines them
  42. function Type_ParserCombinator (it) {
  43. return it.numberOfParsers && true || false
  44. }
  45. //@+node:caminhante.20210911190608.1: *3* Type_CombinedParsers
  46. // interface to a class composed of combined parsers
  47. function Type_CombinedParsers (it) {
  48. return it.numberOfParsers && it.intermediateParsers && true || false
  49. }
  50. //@+node:caminhante.20210911191357.1: *3* Type_Parser
  51. function Type_Parser (it) {
  52. return it.canParse && true || false
  53. }
  54. //@+node:caminhante.20210807141215.1: ** /parsers
  55. //@+node:caminhante.20210807141221.1: *3* SourceCode
  56. // implements SourceCode
  57. function SourceCode (_source) {
  58. typeConstraint(_source,1,'string')
  59. function source () { return _source }
  60. function pos () { return 1 }
  61. function line () { return 1 }
  62. function column () { return 1 }
  63. return {source:source,pos:pos,line:line,column:column}
  64. }
  65. SourceCode.canParse = true
  66. //@+node:caminhante.20210807141232.1: *3* PeekChar
  67. // implements SourceCode, SourceCodeToken
  68. function PeekChar (src) {
  69. try {
  70. typeConstraint(src,1,Type_SourceCode)
  71. function thereIsAChar () { return src.source().length >= src.pos() }
  72. var source = src.source, pos = src.pos
  73. function line () {
  74. if (thereIsAChar()) { if (token() == '\n') {return src.pos()+1} return src.pos() }
  75. throw 'PeekChar.line '+src.line()
  76. }
  77. var column = src.column
  78. function token () {
  79. if ( success() ) { return src.source()[ src.pos()-1 ] }
  80. throw 'PeekChar.token'
  81. }
  82. function tokenLength () { if (success()) {return 1} return 0 }
  83. function success () { return thereIsAChar() }
  84. function atEnd () { return pos() == source().length }
  85. } catch (e) { throw arguments.callee.name+' '+e }
  86. return {source:source,pos:pos,line:line,column:column,token:token,tokenLength:tokenLength,success:success,atEnd:atEnd}
  87. }
  88. PeekChar.canParse = true
  89. //@+node:caminhante.20210807141238.1: *3* NextChar
  90. // implements SourceCode, SourceCodeToken
  91. function NextChar (src) {
  92. try {
  93. typeConstraint(src,1,Type_SourceCode)
  94. var source = src.source
  95. function thereIsANextChar () { return source().length >= src.pos()+1 }
  96. function linefeedCase () { return source()[ src.pos() ] == '\n' }
  97. function pos () { if (success()) {return src.pos()+1} throw 'NextChar.pos '+src.pos() }
  98. function line () {
  99. if (success()) { if (linefeedCase()) {return src.line()+1} return src.line() }
  100. throw 'NextChar.line '+src.line()
  101. }
  102. function column () {
  103. if (success()) { if (linefeedCase()) {return 1} return src.column() }
  104. throw 'NextChar.column '+src.column()
  105. }
  106. function token () {
  107. if (success()) { return src.source()[ src.pos() ] }
  108. throw 'NextChar.token'
  109. }
  110. function tokenLength () { return (success() ? 1 : 0) }
  111. function success () { return thereIsANextChar() }
  112. function atEnd () { return pos() == source().length }
  113. } catch (e) { throw arguments.callee.name+' '+e }
  114. return {source:source,pos:pos,line:line,column:column,token:token,tokenLength:tokenLength,success:success,atEnd:atEnd}
  115. }
  116. NextChar.canParse = true
  117. //@+node:caminhante.20210807141242.1: *3* SkipWhitespace
  118. // implements SourceCode, SourceCodeToken
  119. function SkipWhitespace (src,noLinefeed) {
  120. try {
  121. typeConstraint(src,1,Type_SourceCode)
  122. typeConstraint(noLinefeed,2,'bool')
  123. var source = src.source, spaces = ' '
  124. if (!noLinefeed) {spaces += '\n'}
  125. var p = PeekChar(src), lp, _success = false
  126. while (p.success() && spaces.indexOf(p.token()) != -1) {
  127. _success = true
  128. lp = p
  129. p = NextChar(p)
  130. }
  131. if (!p.success()) { p = lp }
  132. function pos () { if (_success) {return p.pos()} throw 'SkipWhitespace.pos '+p.pos() }
  133. function line () { if (_success) {return p.line()} throw 'SkipWhitespace.line '+p.line() }
  134. function column () { if (_success) {return p.column()} throw 'SkipWhitespace.column '+p.column() }
  135. function token () {
  136. if ( _success ) { return source().slice(src.pos()-1, lp.pos()) }
  137. throw 'SkipWhitespace.token'
  138. }
  139. function tokenLength () { if (_success) {return lp.pos()-src.pos()+1} return 0 }
  140. function success () { return _success }
  141. function atEnd () { return lp.pos() == source().length }
  142. } catch (e) { throw arguments.callee.name+' '+e }
  143. return {source:source,pos:pos,line:line,column:column,token:token,tokenLength:tokenLength,success:success,atEnd:atEnd}
  144. }
  145. SkipWhitespace.canParse = true
  146. //@+node:caminhante.20210807141245.1: *3* ReadLiteral
  147. // implements SourceCode, SourceCodeToken
  148. function ReadLiteral (string, src) {
  149. try {
  150. typeConstraint(string,1,'string')
  151. typeConstraint(src,2,Type_SourceCode)
  152. var source = src.source, p = PeekChar(src), ps = 0, lp, _success = true
  153. while (p.success() && p.token() == string[ps] && ps < string.length) {
  154. lp = p
  155. p = NextChar(p)
  156. ps ++
  157. }
  158. if (ps != string.length) { _success = false }
  159. if (!p.success()) { p = lp }
  160. function pos () { if (_success) {return p.pos()} throw 'ReadLiteral.pos '+p.pos() }
  161. function line () { if (_success) {return p.line()} throw 'ReadLiteral.line '+p.line() }
  162. function column () { if (_success) {return p.column()} throw 'ReadLiteral.column '+p.column() }
  163. function token () {
  164. if ( _success ) { return source().slice(src.pos()-1, lp.pos()) }
  165. throw 'ReadLiteral.token'
  166. }
  167. function tokenLength () { if (_success) {return lp.pos()-src.pos()+1} return 0 }
  168. function success () { return _success }
  169. function atEnd () { return lp.pos() == source().length }
  170. } catch (e) { throw arguments.callee.name+' '+e }
  171. return {source:source,pos:pos,line:line,column:column,token:token,tokenLength:tokenLength,success:success,atEnd:atEnd}
  172. }
  173. ReadLiteral.canParse = true
  174. //@+node:caminhante.20210807161728.1: *3* ReadInteger
  175. // implements SourceCode, SourceCodeToken
  176. function ReadInteger (src) {
  177. try {
  178. typeConstraint(src,1,Type_SourceCode)
  179. var source = src.source, p = PeekChar(src), lp, _success = false
  180. while (p.success() && !isNaN(parseInt(p.token()))) {
  181. _success = true
  182. lp = p
  183. p = NextChar(p)
  184. }
  185. if (!p.success()) { p = lp }
  186. function pos () { if (_success) {return p.pos()} throw 'ReadInteger.pos '+p.pos() }
  187. function line () { if (_success) {return p.line()} throw 'ReadInteger.line '+p.line() }
  188. function column () { if (_success) {return p.column()} throw 'ReadInteger.column '+p.column() }
  189. function token () {
  190. if ( _success ) { return source().slice(src.pos()-1, lp.pos()) }
  191. throw 'ReadInteger.token'
  192. }
  193. function tokenLength () { if (_success) {return lp.pos()-src.pos()+1} return 0 }
  194. function success () { return _success }
  195. function atEnd () { return lp.pos() == source().length }
  196. } catch (e) { throw arguments.callee.name+' '+e }
  197. return {source:source,pos:pos,line:line,column:column,token:token,tokenLength:tokenLength,success:success,atEnd:atEnd}
  198. }
  199. ReadInteger.canParse = true
  200. //@+node:caminhante.20210809175506.1: *3* ReadFloat
  201. // implements SourceCode, SourceCodeToken
  202. function ReadFloat (src) {
  203. try {
  204. typeConstraint(src,1,Type_SourceCode)
  205. var source = src.source, p = PeekChar(src), lp, _success = false
  206. //@+others
  207. //@+node:caminhante.20210813000034.1: *4* float parsing
  208. // float regex: [+-]?([0-9]+([.,][0-9]*)?|[.,][0-9]+)
  209. function isIn (value,array) { return array.indexOf(value) != -1 }
  210. function regexCharsClass (x,chars) {
  211. if (!x) {return false}; var y = PeekChar(x);
  212. if (y.success() && isIn(y.token(),chars)) {return NextChar(y)} return false }
  213. function regexIntegerClass (x) {
  214. if (!x) {return false}; var y = ReadInteger(x);
  215. if (y.success()) {return y} return false }
  216. // step 1: [-+]?
  217. var step1 = regexCharsClass(p,'-+')
  218. if (!step1) {step1 = p}
  219. // step 2: [0-9]+ <step 3> | [.,] <step 4>
  220. var step2 = regexIntegerClass(step1)
  221. if (step2) {
  222. _success = true
  223. lp = step2
  224. // step 3: ([.,][0-9]*)?
  225. var step3_1 = regexCharsClass(step2,'.,')
  226. if (step3_1 && step3_1.success()) {
  227. var step3_2 = regexIntegerClass(step3_1)
  228. lp = step3_2
  229. if (!lp) {lp = step3_1}
  230. }
  231. } else {
  232. step2 = regexCharsClass(step1,'.,')
  233. if (step2) {
  234. // step 4: [0-9]+
  235. var step4 = regexIntegerClass(step2)
  236. if (step4 && step4.success()) {_success = true}
  237. lp = step4
  238. }
  239. }
  240. //@-others
  241. function pos () { if (_success) {return lp.pos()} throw 'ReadFloat.pos '+src.pos() }
  242. function line () { if (_success) {return lp.line()} throw 'ReadFloat.line '+src.line() }
  243. function column () { if (_success) {return lp.column()} throw 'ReadFloat.column '+src.column() }
  244. function token () {
  245. if ( _success ) { return source().slice(src.pos()-1, lp.pos()-1+atEnd()) }
  246. throw 'ReadFloat.token'
  247. }
  248. function tokenLength () { if (_success) {return lp.pos()-src.pos()+atEnd()} return 0 }
  249. function success () { return _success }
  250. function atEnd () { return lp.atEnd() }
  251. } catch (e) { throw arguments.callee.name+' '+e }
  252. return {source:source,pos:pos,line:line,column:column,token:token,tokenLength:tokenLength,success:success,atEnd:atEnd}
  253. }
  254. ReadFloat.canParse = true
  255. //@+node:caminhante.20210807161749.1: *3* ReadString
  256. // implements SourceCode, SourceCodeToken
  257. function ReadString (src) {
  258. try {
  259. typeConstraint(src,1,Type_SourceCode)
  260. var source = src.source, p = PeekChar(src), lp, _success = false, firstQuote
  261. //@+others
  262. //@+node:caminhante.20210815211725.1: *4* string parsing
  263. // step 1: first quote
  264. if (p.success() && (p.token() == '"' || p.token() == "'")) {
  265. firstQuote = p.token()
  266. p = NextChar(p)
  267. // step 2: search for the closing-quote ignoring escaped chars
  268. while (p.success() && !_success) {
  269. switch (p.token()) {
  270. case '\\': p = NextChar(p); break
  271. case firstQuote: lp = p; _success = true;
  272. }
  273. p = NextChar(p)
  274. }
  275. }
  276. //@-others
  277. function pos () { if (_success) {return p.pos()} throw 'ReadString.pos '+p.pos() }
  278. function line () { if (_success) {return p.line()} throw 'ReadString.line '+p.line() }
  279. function column () { if (_success) {return p.column()} throw 'ReadString.column '+p.column() }
  280. function token () {
  281. if ( _success ) { return source().slice(src.pos()-1, lp.pos()) }
  282. throw 'ReadString.token'
  283. }
  284. function tokenLength () { if (_success) {return lp.pos()-src.pos()+1} return 0 }
  285. function success () { return _success }
  286. function atEnd () { return lp.pos() == source().length }
  287. } catch (e) { throw arguments.callee.name+' '+e }
  288. return {source:source,pos:pos,line:line,column:column,token:token,tokenLength:tokenLength,success:success,atEnd:atEnd}
  289. }
  290. ReadString.canParse = true
  291. //@+node:caminhante.20210815211617.1: *3* ReadId
  292. // implements SourceCode, SourceCodeToken
  293. function ReadId (src) {
  294. try {
  295. typeConstraint(src,1,Type_SourceCode)
  296. var source = src.source, p = PeekChar(src), lp, _success = false
  297. //@+others
  298. //@+node:caminhante.20210815212701.1: *4* id parsing
  299. var validFirstChars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_$."
  300. var validChars = validFirstChars + "0123456789-"
  301. if (p.success() && validFirstChars.indexOf(p.token())!=-1) {
  302. _success = true
  303. lp = p
  304. p = NextChar(p)
  305. while (p.success() && validChars.indexOf(p.token())!=-1) {
  306. _success = true
  307. lp = p
  308. p = NextChar(p)
  309. }
  310. }
  311. if (!p.success()) { p = lp }
  312. //@-others
  313. function pos () { if (_success) {return p.pos()} throw 'ReadId.pos '+p.pos() }
  314. function line () { if (_success) {return p.line()} throw 'ReadId.line '+p.line() }
  315. function column () { if (_success) {return p.column()} throw 'ReadId.column '+p.column() }
  316. function token () {
  317. if ( _success ) { return source().slice(src.pos()-1, lp.pos()) }
  318. throw 'ReadId.token'
  319. }
  320. function tokenLength () { if (_success) {return lp.pos()-src.pos()+1} return 0 }
  321. function success () { return _success }
  322. function atEnd () { return lp.pos() == source().length }
  323. } catch (e) { throw arguments.callee.name+' '+e }
  324. return {source:source,pos:pos,line:line,column:column,token:token,tokenLength:tokenLength,success:success,atEnd:atEnd}
  325. }
  326. ReadId.canParse = true
  327. //@+node:caminhante.20210904200731.1: ** /combinadores
  328. //@+node:caminhante.20210905235355.1: *3* ParseFixedLiteral
  329. // generates a parser function that parses a fixed string
  330. function ParseFixedLiteral (lit) {
  331. try {
  332. typeConstraint(lit,1,'string')
  333. //@+others
  334. //@+node:caminhante.20210905235606.1: *4* ReadFixedLiteral
  335. function ReadFixedLiteral (s) {
  336. try {
  337. var r = ReadLiteral(lit,s)
  338. } catch (e) { throw arguments.callee.name+' '+e }
  339. r.numberOfParsers = function () { return 1 }
  340. r.intermediateParsers = function () { return [] }
  341. return r
  342. }
  343. ReadFixedLiteral.numberOfParsers = function () { return 1 }
  344. ReadFixedLiteral.canParse = true
  345. //@-others
  346. } catch (e) { throw arguments.callee.name+' '+e }
  347. return ReadFixedLiteral
  348. }
  349. ParseFixedLiteral.canParse = true
  350. //@+node:caminhante.20210905231641.1: *3* ParseSeq
  351. // combines two or more parsers into a new parser that aplies them in sequence
  352. function ParseSeq () {
  353. try {
  354. var parsers = [].slice.apply(arguments)
  355. parsers.forEach( function(p,i){ typeConstraint(p,i,Type_Parser) } )
  356. //@+others
  357. //@+node:caminhante.20210905232810.1: *4* seqCombinedParser
  358. function numberOfParsers () { return parsers.length }
  359. function seqCombinedParser (src) {
  360. try {
  361. typeConstraint(src,1,Type_SourceCode)
  362. var p = PeekChar(src), lp, ps = [], _success = false, c = 0
  363. while (p.success() && c < parsers.length) {
  364. lp = p
  365. p = parsers[c](p)
  366. ps.push(p)
  367. c++
  368. }
  369. if (p.success()) {_success = true} else { p = lp }
  370. var source = src.source
  371. function pos () { if (_success) {return p.pos()} throw 'seqCombinedParser.pos '+p.pos() }
  372. function line () { if (_success) {return p.line()} throw 'seqCombinedParser.line '+p.line() }
  373. function column () { if (_success) {return p.column()} throw 'seqCombinedParser.column '+p.column() }
  374. function token () {
  375. if ( _success ) { return source().slice(src.pos()-1, p.pos()-1+atEnd()) }
  376. throw 'seqCombinedParser.token'
  377. }
  378. function tokenLength () { if (_success) {return p.pos()-src.pos()+atEnd()} return 0 }
  379. function success () { return _success }
  380. function atEnd () { return last(ps).atEnd() }
  381. function intermediateParsers () { return ps }
  382. } catch (e) { throw arguments.callee.name+' '+e }
  383. return {source:source,pos:pos,line:line,column:column,token:token,tokenLength:tokenLength,success:success,atEnd:atEnd,
  384. numberOfParsers:numberOfParsers,intermediateParsers:intermediateParsers}
  385. }
  386. seqCombinedParser.numberOfParsers = numberOfParsers
  387. seqCombinedParser.canParse = true
  388. //@-others
  389. } catch (e) { throw arguments.callee.name+' '+e }
  390. return seqCombinedParser
  391. }
  392. ParseSeq.canParse = true
  393. //@-others
  394. //@-leo