123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395 |
- //@+leo-ver=5-thin
- //@+node:caminhante.20210807141115.1: * @file parser.js
- //@@language javascript
- //@@tabwidth -2
- //@+others
- //@+node:caminhante.20210815184051.1: ** /funções auxiliares
- //@+node:caminhante.20210815184057.1: *3* typeConstraint
- function typeConstraint (obj, argpos, type) {
- var cn = arguments.callee.caller.name
- function error (c) {
- throw "Type constraint "+c+" violated at "+cn+"->arg("+argpos+")"
- }
- switch (type) {
- case 'string': if (typeof(obj) != 'string' && !(obj instanceof String)) {error(type)}; break
- case 'integer': if (isNaN(obj) || ((obj | 0) !== obj)) {error(type)}; break
- case 'number': if (isNaN(obj) || isNaN(parseFloat(obj))) {error(type)}; break
- case 'bool': if (typeof(obj) !== 'undefined' && typeof(obj) !== 'boolean') {error(type)}; break
- default: if (!type(obj)) {error(type.name)}
- }
- }
- //@+node:caminhante.20210813002512.1: *3* atEOS
- // cursor at the end of source code
- function atEOS (src) {
- typeConstraint(src,1,Type_SourceCode)
- return (src.pos() == src.source().length)
- }
- //@+node:caminhante.20210904195538.1: *3* last
- function last (v) {
- return v[v.length-1]
- }
- //@+node:caminhante.20210807141136.1: ** /interfaces
- //@+node:caminhante.20210807141201.1: *3* Type_SourceCode
- function Type_SourceCode (it) {
- return it.source && it.pos && it.line && it.column && true || false
- }
- //@+node:caminhante.20210807141204.1: *3* Type_SourceCodeToken
- function Type_SourceCodeToken (it) {
- return it.token && it.tokenLength && it.success && it.atEnd && true || false
- }
- //@+node:caminhante.20210906214910.1: *3* Type_ParserCombinator
- // interface to a class that receives parsers and combines them
- function Type_ParserCombinator (it) {
- return it.numberOfParsers && true || false
- }
- //@+node:caminhante.20210911190608.1: *3* Type_CombinedParsers
- // interface to a class composed of combined parsers
- function Type_CombinedParsers (it) {
- return it.numberOfParsers && it.intermediateParsers && true || false
- }
- //@+node:caminhante.20210911191357.1: *3* Type_Parser
- function Type_Parser (it) {
- return it.canParse && true || false
- }
- //@+node:caminhante.20210807141215.1: ** /parsers
- //@+node:caminhante.20210807141221.1: *3* SourceCode
- // implements SourceCode
- function SourceCode (_source) {
- typeConstraint(_source,1,'string')
- function source () { return _source }
- function pos () { return 1 }
- function line () { return 1 }
- function column () { return 1 }
- return {source:source,pos:pos,line:line,column:column}
- }
- SourceCode.canParse = true
- //@+node:caminhante.20210807141232.1: *3* PeekChar
- // implements SourceCode, SourceCodeToken
- function PeekChar (src) {
- try {
- typeConstraint(src,1,Type_SourceCode)
- function thereIsAChar () { return src.source().length >= src.pos() }
- var source = src.source, pos = src.pos
- function line () {
- if (thereIsAChar()) { if (token() == '\n') {return src.pos()+1} return src.pos() }
- throw 'PeekChar.line '+src.line()
- }
- var column = src.column
- function token () {
- if ( success() ) { return src.source()[ src.pos()-1 ] }
- throw 'PeekChar.token'
- }
- function tokenLength () { if (success()) {return 1} return 0 }
- function success () { return thereIsAChar() }
- function atEnd () { return pos() == source().length }
- } catch (e) { throw arguments.callee.name+' '+e }
- return {source:source,pos:pos,line:line,column:column,token:token,tokenLength:tokenLength,success:success,atEnd:atEnd}
- }
- PeekChar.canParse = true
- //@+node:caminhante.20210807141238.1: *3* NextChar
- // implements SourceCode, SourceCodeToken
- function NextChar (src) {
- try {
- typeConstraint(src,1,Type_SourceCode)
- var source = src.source
- function thereIsANextChar () { return source().length >= src.pos()+1 }
- function linefeedCase () { return source()[ src.pos() ] == '\n' }
- function pos () { if (success()) {return src.pos()+1} throw 'NextChar.pos '+src.pos() }
- function line () {
- if (success()) { if (linefeedCase()) {return src.line()+1} return src.line() }
- throw 'NextChar.line '+src.line()
- }
- function column () {
- if (success()) { if (linefeedCase()) {return 1} return src.column() }
- throw 'NextChar.column '+src.column()
- }
- function token () {
- if (success()) { return src.source()[ src.pos() ] }
- throw 'NextChar.token'
- }
- function tokenLength () { return (success() ? 1 : 0) }
- function success () { return thereIsANextChar() }
- function atEnd () { return pos() == source().length }
- } catch (e) { throw arguments.callee.name+' '+e }
- return {source:source,pos:pos,line:line,column:column,token:token,tokenLength:tokenLength,success:success,atEnd:atEnd}
- }
- NextChar.canParse = true
- //@+node:caminhante.20210807141242.1: *3* SkipWhitespace
- // implements SourceCode, SourceCodeToken
- function SkipWhitespace (src,noLinefeed) {
- try {
- typeConstraint(src,1,Type_SourceCode)
- typeConstraint(noLinefeed,2,'bool')
- var source = src.source, spaces = ' '
- if (!noLinefeed) {spaces += '\n'}
- var p = PeekChar(src), lp, _success = false
- while (p.success() && spaces.indexOf(p.token()) != -1) {
- _success = true
- lp = p
- p = NextChar(p)
- }
- if (!p.success()) { p = lp }
- function pos () { if (_success) {return p.pos()} throw 'SkipWhitespace.pos '+p.pos() }
- function line () { if (_success) {return p.line()} throw 'SkipWhitespace.line '+p.line() }
- function column () { if (_success) {return p.column()} throw 'SkipWhitespace.column '+p.column() }
- function token () {
- if ( _success ) { return source().slice(src.pos()-1, lp.pos()) }
- throw 'SkipWhitespace.token'
- }
- function tokenLength () { if (_success) {return lp.pos()-src.pos()+1} return 0 }
- function success () { return _success }
- function atEnd () { return lp.pos() == source().length }
- } catch (e) { throw arguments.callee.name+' '+e }
- return {source:source,pos:pos,line:line,column:column,token:token,tokenLength:tokenLength,success:success,atEnd:atEnd}
- }
- SkipWhitespace.canParse = true
- //@+node:caminhante.20210807141245.1: *3* ReadLiteral
- // implements SourceCode, SourceCodeToken
- function ReadLiteral (string, src) {
- try {
- typeConstraint(string,1,'string')
- typeConstraint(src,2,Type_SourceCode)
- var source = src.source, p = PeekChar(src), ps = 0, lp, _success = true
- while (p.success() && p.token() == string[ps] && ps < string.length) {
- lp = p
- p = NextChar(p)
- ps ++
- }
- if (ps != string.length) { _success = false }
- if (!p.success()) { p = lp }
- function pos () { if (_success) {return p.pos()} throw 'ReadLiteral.pos '+p.pos() }
- function line () { if (_success) {return p.line()} throw 'ReadLiteral.line '+p.line() }
- function column () { if (_success) {return p.column()} throw 'ReadLiteral.column '+p.column() }
- function token () {
- if ( _success ) { return source().slice(src.pos()-1, lp.pos()) }
- throw 'ReadLiteral.token'
- }
- function tokenLength () { if (_success) {return lp.pos()-src.pos()+1} return 0 }
- function success () { return _success }
- function atEnd () { return lp.pos() == source().length }
- } catch (e) { throw arguments.callee.name+' '+e }
- return {source:source,pos:pos,line:line,column:column,token:token,tokenLength:tokenLength,success:success,atEnd:atEnd}
- }
- ReadLiteral.canParse = true
- //@+node:caminhante.20210807161728.1: *3* ReadInteger
- // implements SourceCode, SourceCodeToken
- function ReadInteger (src) {
- try {
- typeConstraint(src,1,Type_SourceCode)
- var source = src.source, p = PeekChar(src), lp, _success = false
- while (p.success() && !isNaN(parseInt(p.token()))) {
- _success = true
- lp = p
- p = NextChar(p)
- }
- if (!p.success()) { p = lp }
- function pos () { if (_success) {return p.pos()} throw 'ReadInteger.pos '+p.pos() }
- function line () { if (_success) {return p.line()} throw 'ReadInteger.line '+p.line() }
- function column () { if (_success) {return p.column()} throw 'ReadInteger.column '+p.column() }
- function token () {
- if ( _success ) { return source().slice(src.pos()-1, lp.pos()) }
- throw 'ReadInteger.token'
- }
- function tokenLength () { if (_success) {return lp.pos()-src.pos()+1} return 0 }
- function success () { return _success }
- function atEnd () { return lp.pos() == source().length }
- } catch (e) { throw arguments.callee.name+' '+e }
- return {source:source,pos:pos,line:line,column:column,token:token,tokenLength:tokenLength,success:success,atEnd:atEnd}
- }
- ReadInteger.canParse = true
- //@+node:caminhante.20210809175506.1: *3* ReadFloat
- // implements SourceCode, SourceCodeToken
- function ReadFloat (src) {
- try {
- typeConstraint(src,1,Type_SourceCode)
- var source = src.source, p = PeekChar(src), lp, _success = false
- //@+others
- //@+node:caminhante.20210813000034.1: *4* float parsing
- // float regex: [+-]?([0-9]+([.,][0-9]*)?|[.,][0-9]+)
- function isIn (value,array) { return array.indexOf(value) != -1 }
- function regexCharsClass (x,chars) {
- if (!x) {return false}; var y = PeekChar(x);
- if (y.success() && isIn(y.token(),chars)) {return NextChar(y)} return false }
- function regexIntegerClass (x) {
- if (!x) {return false}; var y = ReadInteger(x);
- if (y.success()) {return y} return false }
- // step 1: [-+]?
- var step1 = regexCharsClass(p,'-+')
- if (!step1) {step1 = p}
- // step 2: [0-9]+ <step 3> | [.,] <step 4>
- var step2 = regexIntegerClass(step1)
- if (step2) {
- _success = true
- lp = step2
- // step 3: ([.,][0-9]*)?
- var step3_1 = regexCharsClass(step2,'.,')
- if (step3_1 && step3_1.success()) {
- var step3_2 = regexIntegerClass(step3_1)
- lp = step3_2
- if (!lp) {lp = step3_1}
- }
- } else {
- step2 = regexCharsClass(step1,'.,')
- if (step2) {
- // step 4: [0-9]+
- var step4 = regexIntegerClass(step2)
- if (step4 && step4.success()) {_success = true}
- lp = step4
- }
- }
- //@-others
- function pos () { if (_success) {return lp.pos()} throw 'ReadFloat.pos '+src.pos() }
- function line () { if (_success) {return lp.line()} throw 'ReadFloat.line '+src.line() }
- function column () { if (_success) {return lp.column()} throw 'ReadFloat.column '+src.column() }
- function token () {
- if ( _success ) { return source().slice(src.pos()-1, lp.pos()-1+atEnd()) }
- throw 'ReadFloat.token'
- }
- function tokenLength () { if (_success) {return lp.pos()-src.pos()+atEnd()} return 0 }
- function success () { return _success }
- function atEnd () { return lp.atEnd() }
- } catch (e) { throw arguments.callee.name+' '+e }
- return {source:source,pos:pos,line:line,column:column,token:token,tokenLength:tokenLength,success:success,atEnd:atEnd}
- }
- ReadFloat.canParse = true
- //@+node:caminhante.20210807161749.1: *3* ReadString
- // implements SourceCode, SourceCodeToken
- function ReadString (src) {
- try {
- typeConstraint(src,1,Type_SourceCode)
- var source = src.source, p = PeekChar(src), lp, _success = false, firstQuote
- //@+others
- //@+node:caminhante.20210815211725.1: *4* string parsing
- // step 1: first quote
- if (p.success() && (p.token() == '"' || p.token() == "'")) {
- firstQuote = p.token()
- p = NextChar(p)
- // step 2: search for the closing-quote ignoring escaped chars
- while (p.success() && !_success) {
- switch (p.token()) {
- case '\\': p = NextChar(p); break
- case firstQuote: lp = p; _success = true;
- }
- p = NextChar(p)
- }
- }
- //@-others
- function pos () { if (_success) {return p.pos()} throw 'ReadString.pos '+p.pos() }
- function line () { if (_success) {return p.line()} throw 'ReadString.line '+p.line() }
- function column () { if (_success) {return p.column()} throw 'ReadString.column '+p.column() }
- function token () {
- if ( _success ) { return source().slice(src.pos()-1, lp.pos()) }
- throw 'ReadString.token'
- }
- function tokenLength () { if (_success) {return lp.pos()-src.pos()+1} return 0 }
- function success () { return _success }
- function atEnd () { return lp.pos() == source().length }
- } catch (e) { throw arguments.callee.name+' '+e }
- return {source:source,pos:pos,line:line,column:column,token:token,tokenLength:tokenLength,success:success,atEnd:atEnd}
- }
- ReadString.canParse = true
- //@+node:caminhante.20210815211617.1: *3* ReadId
- // implements SourceCode, SourceCodeToken
- function ReadId (src) {
- try {
- typeConstraint(src,1,Type_SourceCode)
- var source = src.source, p = PeekChar(src), lp, _success = false
- //@+others
- //@+node:caminhante.20210815212701.1: *4* id parsing
- var validFirstChars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_$."
- var validChars = validFirstChars + "0123456789-"
- if (p.success() && validFirstChars.indexOf(p.token())!=-1) {
- _success = true
- lp = p
- p = NextChar(p)
- while (p.success() && validChars.indexOf(p.token())!=-1) {
- _success = true
- lp = p
- p = NextChar(p)
- }
- }
- if (!p.success()) { p = lp }
- //@-others
- function pos () { if (_success) {return p.pos()} throw 'ReadId.pos '+p.pos() }
- function line () { if (_success) {return p.line()} throw 'ReadId.line '+p.line() }
- function column () { if (_success) {return p.column()} throw 'ReadId.column '+p.column() }
- function token () {
- if ( _success ) { return source().slice(src.pos()-1, lp.pos()) }
- throw 'ReadId.token'
- }
- function tokenLength () { if (_success) {return lp.pos()-src.pos()+1} return 0 }
- function success () { return _success }
- function atEnd () { return lp.pos() == source().length }
- } catch (e) { throw arguments.callee.name+' '+e }
- return {source:source,pos:pos,line:line,column:column,token:token,tokenLength:tokenLength,success:success,atEnd:atEnd}
- }
- ReadId.canParse = true
- //@+node:caminhante.20210904200731.1: ** /combinadores
- //@+node:caminhante.20210905235355.1: *3* ParseFixedLiteral
- // generates a parser function that parses a fixed string
- function ParseFixedLiteral (lit) {
- try {
- typeConstraint(lit,1,'string')
- //@+others
- //@+node:caminhante.20210905235606.1: *4* ReadFixedLiteral
- function ReadFixedLiteral (s) {
- try {
- var r = ReadLiteral(lit,s)
- } catch (e) { throw arguments.callee.name+' '+e }
- r.numberOfParsers = function () { return 1 }
- r.intermediateParsers = function () { return [] }
- return r
- }
- ReadFixedLiteral.numberOfParsers = function () { return 1 }
- ReadFixedLiteral.canParse = true
- //@-others
- } catch (e) { throw arguments.callee.name+' '+e }
- return ReadFixedLiteral
- }
- ParseFixedLiteral.canParse = true
- //@+node:caminhante.20210905231641.1: *3* ParseSeq
- // combines two or more parsers into a new parser that aplies them in sequence
- function ParseSeq () {
- try {
- var parsers = [].slice.apply(arguments)
- parsers.forEach( function(p,i){ typeConstraint(p,i,Type_Parser) } )
- //@+others
- //@+node:caminhante.20210905232810.1: *4* seqCombinedParser
- function numberOfParsers () { return parsers.length }
- function seqCombinedParser (src) {
- try {
- typeConstraint(src,1,Type_SourceCode)
- var p = PeekChar(src), lp, ps = [], _success = false, c = 0
- while (p.success() && c < parsers.length) {
- lp = p
- p = parsers[c](p)
- ps.push(p)
- c++
- }
- if (p.success()) {_success = true} else { p = lp }
- var source = src.source
- function pos () { if (_success) {return p.pos()} throw 'seqCombinedParser.pos '+p.pos() }
- function line () { if (_success) {return p.line()} throw 'seqCombinedParser.line '+p.line() }
- function column () { if (_success) {return p.column()} throw 'seqCombinedParser.column '+p.column() }
- function token () {
- if ( _success ) { return source().slice(src.pos()-1, p.pos()-1+atEnd()) }
- throw 'seqCombinedParser.token'
- }
- function tokenLength () { if (_success) {return p.pos()-src.pos()+atEnd()} return 0 }
- function success () { return _success }
- function atEnd () { return last(ps).atEnd() }
- function intermediateParsers () { return ps }
- } catch (e) { throw arguments.callee.name+' '+e }
- return {source:source,pos:pos,line:line,column:column,token:token,tokenLength:tokenLength,success:success,atEnd:atEnd,
- numberOfParsers:numberOfParsers,intermediateParsers:intermediateParsers}
- }
- seqCombinedParser.numberOfParsers = numberOfParsers
- seqCombinedParser.canParse = true
- //@-others
- } catch (e) { throw arguments.callee.name+' '+e }
- return seqCombinedParser
- }
- ParseSeq.canParse = true
- //@-others
- //@-leo
|