rst.nim 140 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2012 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## This module implements a `reStructuredText`:idx: (RST) and
  10. ## `Markdown`:idx: parser.
  11. ## User's manual on supported markup syntax and command line usage can be
  12. ## found in [Nim-flavored Markdown and reStructuredText](markdown_rst.html).
  13. ##
  14. ## * See also [Nim DocGen Tools Guide](docgen.html) for handling of
  15. ## ``.nim`` files.
  16. ## * See also [packages/docutils/rstgen module](rstgen.html) to know how to
  17. ## generate HTML or Latex strings (for embedding them into custom documents).
  18. ##
  19. ## Choice between Markdown and RST as well as optional additional features are
  20. ## turned on by passing ``options:`` [RstParseOptions] to [proc rstParse].
  21. import
  22. std/[os, strutils, enumutils, algorithm, lists, sequtils,
  23. tables, strscans]
  24. import dochelpers, rstidx, rstast
  25. import std/private/miscdollars
  26. from highlite import SourceLanguage, getSourceLanguage
  27. when defined(nimPreviewSlimSystem):
  28. import std/[assertions, syncio]
  29. type
  30. RstParseOption* = enum ## options for the RST parser
  31. roSupportSmilies, ## make the RST parser support smilies like ``:)``
  32. roSupportRawDirective, ## support the ``raw`` directive (don't support
  33. ## it for sandboxing)
  34. roSupportMarkdown, ## support additional features of Markdown
  35. roPreferMarkdown, ## parse as Markdown (keeping RST as "extension"
  36. ## to Markdown) -- implies `roSupportMarkdown`
  37. roNimFile ## set for Nim files where default interpreted
  38. ## text role should be :nim:
  39. roSandboxDisabled ## this option enables certain options
  40. ## (e.g. raw, include, importdoc)
  41. ## which are disabled by default as they can
  42. ## enable users to read arbitrary data and
  43. ## perform XSS if the parser is used in a web
  44. ## app.
  45. RstParseOptions* = set[RstParseOption]
  46. MsgClass* = enum
  47. mcHint = "Hint",
  48. mcWarning = "Warning",
  49. mcError = "Error"
  50. # keep the order in sync with compiler/docgen.nim and compiler/lineinfos.nim:
  51. MsgKind* = enum ## the possible messages
  52. meCannotOpenFile = "cannot open '$1'",
  53. meExpected = "'$1' expected",
  54. meMissingClosing = "$1",
  55. meGridTableNotImplemented = "grid table is not implemented",
  56. meMarkdownIllformedTable = "illformed delimiter row of a Markdown table",
  57. meIllformedTable = "Illformed table: $1",
  58. meNewSectionExpected = "new section expected $1",
  59. meGeneralParseError = "general parse error",
  60. meInvalidDirective = "invalid directive: '$1'",
  61. meInvalidField = "invalid field: $1",
  62. meFootnoteMismatch = "mismatch in number of footnotes and their refs: $1",
  63. mwRedefinitionOfLabel = "redefinition of label '$1'",
  64. mwUnknownSubstitution = "unknown substitution '$1'",
  65. mwAmbiguousLink = "ambiguous doc link $1",
  66. mwBrokenLink = "broken link '$1'",
  67. mwUnsupportedLanguage = "language '$1' not supported",
  68. mwUnsupportedField = "field '$1' not supported",
  69. mwRstStyle = "RST style: $1",
  70. mwUnusedImportdoc = "importdoc for '$1' is not used",
  71. meSandboxedDirective = "disabled directive: '$1'",
  72. MsgHandler* = proc (filename: string, line, col: int, msgKind: MsgKind,
  73. arg: string) {.closure, gcsafe.} ## what to do in case of an error
  74. FindFileHandler* = proc (filename: string): string {.closure, gcsafe.}
  75. FindRefFileHandler* =
  76. proc (targetRelPath: string):
  77. tuple[targetPath: string, linkRelPath: string] {.closure, gcsafe.}
  78. ## returns where .html or .idx file should be found by its relative path;
  79. ## `linkRelPath` is a prefix to be added before a link anchor from such file
  80. proc rstnodeToRefname*(n: PRstNode): string
  81. proc addNodes*(n: PRstNode): string
  82. proc getFieldValue*(n: PRstNode, fieldname: string): string {.gcsafe.}
  83. proc getArgument*(n: PRstNode): string
  84. # ----------------------------- scanner part --------------------------------
  85. const
  86. SymChars: set[char] = {'a'..'z', 'A'..'Z', '0'..'9', '\x80'..'\xFF'}
  87. SmileyStartChars: set[char] = {':', ';', '8'}
  88. Smilies = {
  89. ":D": "icon_e_biggrin",
  90. ":-D": "icon_e_biggrin",
  91. ":)": "icon_e_smile",
  92. ":-)": "icon_e_smile",
  93. ";)": "icon_e_wink",
  94. ";-)": "icon_e_wink",
  95. ":(": "icon_e_sad",
  96. ":-(": "icon_e_sad",
  97. ":o": "icon_e_surprised",
  98. ":-o": "icon_e_surprised",
  99. ":shock:": "icon_eek",
  100. ":?": "icon_e_confused",
  101. ":-?": "icon_e_confused",
  102. ":-/": "icon_e_confused",
  103. "8-)": "icon_cool",
  104. ":lol:": "icon_lol",
  105. ":x": "icon_mad",
  106. ":-x": "icon_mad",
  107. ":P": "icon_razz",
  108. ":-P": "icon_razz",
  109. ":oops:": "icon_redface",
  110. ":cry:": "icon_cry",
  111. ":evil:": "icon_evil",
  112. ":twisted:": "icon_twisted",
  113. ":roll:": "icon_rolleyes",
  114. ":!:": "icon_exclaim",
  115. ":?:": "icon_question",
  116. ":idea:": "icon_idea",
  117. ":arrow:": "icon_arrow",
  118. ":|": "icon_neutral",
  119. ":-|": "icon_neutral",
  120. ":mrgreen:": "icon_mrgreen",
  121. ":geek:": "icon_e_geek",
  122. ":ugeek:": "icon_e_ugeek"
  123. }
  124. SandboxDirAllowlist = [
  125. "image", "code", "code-block", "admonition", "attention", "caution",
  126. "container", "contents", "danger", "default-role", "error", "figure",
  127. "hint", "important", "index", "note", "role", "tip", "title", "warning"]
  128. type
  129. TokType = enum
  130. tkEof, tkIndent,
  131. tkWhite, tkWord,
  132. tkAdornment, # used for chapter adornment, transitions and
  133. # horizontal table borders
  134. tkPunct, # one or many punctuation characters
  135. tkOther
  136. Token = object # a RST token
  137. kind*: TokType # the type of the token
  138. ival*: int # the indentation or parsed integer value
  139. symbol*: string # the parsed symbol as string
  140. line*, col*: int # line and column of the token
  141. TokenSeq = seq[Token]
  142. Lexer = object of RootObj
  143. buf*: cstring
  144. bufpos*: int
  145. line*, col*, baseIndent*: int
  146. adornmentLine*: bool
  147. escapeNext*: bool
  148. proc getThing(L: var Lexer, tok: var Token, s: set[char]) =
  149. tok.kind = tkWord
  150. tok.line = L.line
  151. tok.col = L.col
  152. var pos = L.bufpos
  153. while true:
  154. tok.symbol.add(L.buf[pos])
  155. inc pos
  156. if L.buf[pos] notin s: break
  157. inc L.col, pos - L.bufpos
  158. L.bufpos = pos
  159. proc isCurrentLineAdornment(L: var Lexer): bool =
  160. var pos = L.bufpos
  161. let c = L.buf[pos]
  162. while true:
  163. inc pos
  164. if L.buf[pos] in {'\c', '\l', '\0'}:
  165. break
  166. if c == '+': # grid table
  167. if L.buf[pos] notin {'-', '=', '+'}:
  168. return false
  169. else: # section adornment or table horizontal border
  170. if L.buf[pos] notin {c, ' ', '\t', '\v', '\f'}:
  171. return false
  172. result = true
  173. proc getPunctAdornment(L: var Lexer, tok: var Token) =
  174. if L.adornmentLine:
  175. tok.kind = tkAdornment
  176. else:
  177. tok.kind = tkPunct
  178. tok.line = L.line
  179. tok.col = L.col
  180. var pos = L.bufpos
  181. let c = L.buf[pos]
  182. if not L.escapeNext and (c != '\\' or L.adornmentLine):
  183. while true:
  184. tok.symbol.add(L.buf[pos])
  185. inc pos
  186. if L.buf[pos] != c: break
  187. elif L.escapeNext:
  188. tok.symbol.add(L.buf[pos])
  189. inc pos
  190. else: # not L.escapeNext and c == '\\' and not L.adornmentLine
  191. tok.symbol.add '\\'
  192. inc pos
  193. L.escapeNext = true
  194. inc L.col, pos - L.bufpos
  195. L.bufpos = pos
  196. if tok.symbol == "\\": tok.kind = tkPunct
  197. # nim extension: standalone \ can not be adornment
  198. proc getBracket(L: var Lexer, tok: var Token) =
  199. tok.kind = tkPunct
  200. tok.line = L.line
  201. tok.col = L.col
  202. tok.symbol.add(L.buf[L.bufpos])
  203. inc L.col
  204. inc L.bufpos
  205. proc getIndentAux(L: var Lexer, start: int): int =
  206. var pos = start
  207. # skip the newline (but include it in the token!)
  208. if L.buf[pos] == '\r':
  209. if L.buf[pos + 1] == '\n': inc pos, 2
  210. else: inc pos
  211. elif L.buf[pos] == '\n':
  212. inc pos
  213. while true:
  214. case L.buf[pos]
  215. of ' ', '\v', '\f':
  216. inc pos
  217. inc result
  218. of '\t':
  219. inc pos
  220. result = result - (result mod 8) + 8
  221. else:
  222. break # EndOfFile also leaves the loop
  223. if L.buf[pos] == '\0':
  224. result = 0
  225. elif L.buf[pos] == '\n' or L.buf[pos] == '\r':
  226. # look at the next line for proper indentation:
  227. result = getIndentAux(L, pos)
  228. L.bufpos = pos # no need to set back buf
  229. proc getIndent(L: var Lexer, tok: var Token) =
  230. tok.col = 0
  231. tok.kind = tkIndent # skip the newline (but include it in the token!)
  232. tok.ival = getIndentAux(L, L.bufpos)
  233. inc L.line
  234. tok.line = L.line
  235. L.col = tok.ival
  236. tok.ival = max(tok.ival - L.baseIndent, 0)
  237. tok.symbol = "\n" & spaces(tok.ival)
  238. proc rawGetTok(L: var Lexer, tok: var Token) =
  239. tok.symbol = ""
  240. tok.ival = 0
  241. if L.col == 0:
  242. L.adornmentLine = false
  243. var c = L.buf[L.bufpos]
  244. case c
  245. of 'a'..'z', 'A'..'Z', '\x80'..'\xFF', '0'..'9':
  246. getThing(L, tok, SymChars)
  247. of ' ', '\t', '\v', '\f':
  248. getThing(L, tok, {' ', '\t'})
  249. tok.kind = tkWhite
  250. if L.buf[L.bufpos] in {'\r', '\n'}:
  251. rawGetTok(L, tok) # ignore spaces before \n
  252. of '\r', '\n':
  253. getIndent(L, tok)
  254. L.adornmentLine = false
  255. of '!', '\"', '#', '$', '%', '&', '\'', '*', '+', ',', '-', '.',
  256. '/', ':', ';', '<', '=', '>', '?', '@', '\\', '^', '_', '`',
  257. '|', '~':
  258. if L.col == 0:
  259. L.adornmentLine = L.isCurrentLineAdornment()
  260. getPunctAdornment(L, tok)
  261. of '(', ')', '[', ']', '{', '}':
  262. getBracket(L, tok)
  263. else:
  264. tok.line = L.line
  265. tok.col = L.col
  266. if c == '\0':
  267. tok.kind = tkEof
  268. else:
  269. tok.kind = tkOther
  270. tok.symbol.add(c)
  271. inc L.bufpos
  272. inc L.col
  273. tok.col = max(tok.col - L.baseIndent, 0)
  274. proc getTokens(buffer: string, tokens: var TokenSeq) =
  275. var L: Lexer
  276. var length = tokens.len
  277. L.buf = cstring(buffer)
  278. L.line = 0 # skip UTF-8 BOM
  279. if L.buf[0] == '\xEF' and L.buf[1] == '\xBB' and L.buf[2] == '\xBF':
  280. inc L.bufpos, 3
  281. while true:
  282. inc length
  283. setLen(tokens, length)
  284. let toEscape = L.escapeNext
  285. rawGetTok(L, tokens[length - 1])
  286. if toEscape: L.escapeNext = false
  287. if tokens[length - 1].kind == tkEof: break
  288. if tokens[0].kind == tkWhite:
  289. # BUGFIX
  290. tokens[0].ival = tokens[0].symbol.len
  291. tokens[0].kind = tkIndent
  292. type
  293. LevelInfo = object
  294. symbol: char # adornment character
  295. hasOverline: bool # has also overline (besides underline)?
  296. line: int # the last line of this style occurrence
  297. # (for error message)
  298. hasPeers: bool # has headings on the same level of hierarchy?
  299. LiteralBlockKind = enum # RST-style literal blocks after `::`
  300. lbNone,
  301. lbIndentedLiteralBlock,
  302. lbQuotedLiteralBlock
  303. LevelMap = seq[LevelInfo] # Saves for each possible title adornment
  304. # style its level in the current document.
  305. SubstitutionKind = enum
  306. rstSubstitution = "substitution",
  307. hyperlinkAlias = "hyperlink alias",
  308. implicitHyperlinkAlias = "implicitly-generated hyperlink alias"
  309. Substitution = object
  310. kind*: SubstitutionKind
  311. key*: string
  312. value*: PRstNode
  313. info*: TLineInfo # place where the substitution was defined
  314. AnchorRule = enum
  315. arInternalRst, ## For automatically generated RST anchors (from
  316. ## headings, footnotes, inline internal targets):
  317. ## case-insensitive, 1-space-significant (by RST spec)
  318. arExternalRst, ## For external .nim doc comments or .rst/.md
  319. arNim ## For anchors generated by ``docgen.nim``: Nim-style case
  320. ## sensitivity, etc. (see `proc normalizeNimName`_ for details)
  321. arHyperlink, ## For links with manually set anchors in
  322. ## form `text <pagename.html#anchor>`_
  323. RstAnchorKind = enum
  324. manualDirectiveAnchor = "manual directive anchor",
  325. manualInlineAnchor = "manual inline anchor",
  326. footnoteAnchor = "footnote anchor",
  327. headlineAnchor = "implicitly-generated headline anchor"
  328. AnchorSubst = object
  329. info: TLineInfo # the file where the anchor was defined
  330. priority: int
  331. case kind: range[arInternalRst .. arNim]
  332. of arInternalRst:
  333. anchorType: RstAnchorKind
  334. target: PRstNode
  335. of arExternalRst:
  336. anchorTypeExt: RstAnchorKind
  337. refnameExt: string
  338. of arNim:
  339. module: FileIndex # anchor's module (generally not the same as file)
  340. tooltip: string # displayed tooltip for Nim-generated anchors
  341. langSym: LangSymbol
  342. refname: string # A reference name that will be inserted directly
  343. # into HTML/Latex.
  344. external: bool
  345. AnchorSubstTable = Table[string, seq[AnchorSubst]]
  346. # use `seq` to account for duplicate anchors
  347. FootnoteType = enum
  348. fnManualNumber, # manually numbered footnote like [3]
  349. fnAutoNumber, # auto-numbered footnote [#]
  350. fnAutoNumberLabel, # auto-numbered with label [#label]
  351. fnAutoSymbol, # auto-symbol footnote [*]
  352. fnCitation # simple text label like [citation2021]
  353. FootnoteSubst = tuple
  354. kind: FootnoteType # discriminator
  355. number: int # valid for fnManualNumber (always) and fnAutoNumber,
  356. # fnAutoNumberLabel after resolveSubs is called
  357. autoNumIdx: int # order of occurrence: fnAutoNumber, fnAutoNumberLabel
  358. autoSymIdx: int # order of occurrence: fnAutoSymbol
  359. label: string # valid for fnAutoNumberLabel
  360. RstFileTable* = object
  361. filenameToIdx*: Table[string, FileIndex]
  362. idxToFilename*: seq[string]
  363. ImportdocInfo = object
  364. used: bool # was this import used?
  365. fromInfo: TLineInfo # place of `.. importdoc::` directive
  366. idxPath: string # full path to ``.idx`` file
  367. linkRelPath: string # prefix before target anchor
  368. title: string # document title obtained from ``.idx``
  369. RstSharedState = object
  370. options*: RstParseOptions # parsing options
  371. hLevels: LevelMap # hierarchy of heading styles
  372. hTitleCnt: int # =0 if no title, =1 if only main title,
  373. # =2 if both title and subtitle are present
  374. hCurLevel: int # current section level
  375. currRole: string # current interpreted text role
  376. currRoleKind: RstNodeKind # ... and its node kind
  377. subs: seq[Substitution] # substitutions
  378. refs*: seq[Substitution] # references
  379. anchors*: AnchorSubstTable
  380. # internal target substitutions
  381. lineFootnoteNum: seq[TLineInfo] # footnote line, auto numbers .. [#]
  382. lineFootnoteNumRef: seq[TLineInfo] # footnote line, their reference [#]_
  383. currFootnoteNumRef: int # ... their counter for `resolveSubs`
  384. lineFootnoteSym: seq[TLineInfo] # footnote line, auto symbols .. [*]
  385. lineFootnoteSymRef: seq[TLineInfo] # footnote line, their reference [*]_
  386. currFootnoteSymRef: int # ... their counter for `resolveSubs`
  387. footnotes: seq[FootnoteSubst] # correspondence b/w footnote label,
  388. # number, order of occurrence
  389. msgHandler: MsgHandler # How to handle errors.
  390. findFile: FindFileHandler # How to find files for include.
  391. findRefFile: FindRefFileHandler
  392. # How to find files imported by importdoc.
  393. filenames*: RstFileTable # map file name <-> FileIndex (for storing
  394. # file names for warnings after 1st stage)
  395. currFileIdx*: FileIndex # current index in `filenames`
  396. tocPart*: seq[PRstNode] # all the headings of a document
  397. hasToc*: bool
  398. idxImports*: Table[string, ImportdocInfo]
  399. # map `importdoc`ed filename -> it's info
  400. nimFileImported*: bool # Was any ``.nim`` module `importdoc`ed ?
  401. PRstSharedState* = ref RstSharedState
  402. ManualAnchor = object
  403. alias: string # a (short) name that can substitute the `anchor`
  404. anchor: string # anchor = id = refname
  405. info: TLineInfo
  406. RstParser = object of RootObj
  407. idx*: int
  408. tok*: TokenSeq
  409. s*: PRstSharedState
  410. indentStack*: seq[int]
  411. line*, col*: int ## initial line/column of whole text or
  412. ## documenation fragment that will be added
  413. ## in case of error/warning reporting to
  414. ## (relative) line/column of the token.
  415. curAnchors*: seq[ManualAnchor]
  416. ## seq to accumulate aliases for anchors:
  417. ## because RST can have >1 alias per 1 anchor
  418. EParseError* = object of ValueError
  419. SectionParser = proc (p: var RstParser): PRstNode {.nimcall, gcsafe.}
  420. const
  421. LineRstInit* = 1 ## Initial line number for standalone RST text
  422. ColRstInit* = 0 ## Initial column number for standalone RST text
  423. ## (Nim global reporting adds ColOffset=1)
  424. ColRstOffset* = 1 ## 1: a replica of ColOffset for internal use
  425. template currentTok(p: RstParser): Token = p.tok[p.idx]
  426. template prevTok(p: RstParser): Token = p.tok[p.idx - 1]
  427. template nextTok(p: RstParser): Token = p.tok[p.idx + 1]
  428. proc whichMsgClass*(k: MsgKind): MsgClass =
  429. ## returns which message class `k` belongs to.
  430. case k.symbolName[1]
  431. of 'e', 'E': result = mcError
  432. of 'w', 'W': result = mcWarning
  433. of 'h', 'H': result = mcHint
  434. else: assert false, "msgkind does not fit naming scheme"
  435. proc defaultMsgHandler*(filename: string, line, col: int, msgkind: MsgKind,
  436. arg: string) =
  437. let mc = msgkind.whichMsgClass
  438. let a = $msgkind % arg
  439. var message: string
  440. toLocation(message, filename, line, col + ColRstOffset)
  441. message.add " $1: $2" % [$mc, a]
  442. if mc == mcError: raise newException(EParseError, message)
  443. else: writeLine(stdout, message)
  444. proc defaultFindFile*(filename: string): string =
  445. if fileExists(filename): result = filename
  446. else: result = ""
  447. proc defaultFindRefFile*(filename: string): (string, string) =
  448. (filename, "")
  449. proc defaultRole(options: RstParseOptions): string =
  450. if roNimFile in options: "nim" else: "literal"
  451. proc whichRoleAux(sym: string): RstNodeKind =
  452. let r = sym.toLowerAscii
  453. case r
  454. of "idx": result = rnIdx
  455. of "literal": result = rnInlineLiteral
  456. of "strong": result = rnStrongEmphasis
  457. of "emphasis": result = rnEmphasis
  458. of "sub", "subscript": result = rnSub
  459. of "sup", "superscript": result = rnSup
  460. # literal and code are the same in our implementation
  461. of "code": result = rnInlineLiteral
  462. of "program", "option", "tok": result = rnCodeFragment
  463. # c++ currently can be spelled only as cpp, c# only as csharp
  464. elif getSourceLanguage(r) != langNone:
  465. result = rnInlineCode
  466. else: # unknown role
  467. result = rnUnknownRole
  468. proc len(filenames: RstFileTable): int = filenames.idxToFilename.len
  469. proc addFilename*(s: PRstSharedState, file1: string): FileIndex =
  470. ## Returns index of filename, adding it if it has not been used before
  471. let nextIdx = s.filenames.len.FileIndex
  472. result = getOrDefault(s.filenames.filenameToIdx, file1, default = nextIdx)
  473. if result == nextIdx:
  474. s.filenames.filenameToIdx[file1] = result
  475. s.filenames.idxToFilename.add file1
  476. proc setCurrFilename*(s: PRstSharedState, file1: string) =
  477. s.currFileIdx = addFilename(s, file1)
  478. proc getFilename(filenames: RstFileTable, fid: FileIndex): string =
  479. doAssert(0 <= fid.int and fid.int < filenames.len,
  480. "incorrect FileIndex $1 (range 0..$2)" % [
  481. $fid.int, $(filenames.len - 1)])
  482. result = filenames.idxToFilename[fid.int]
  483. proc getFilename(s: PRstSharedState, subst: AnchorSubst): string =
  484. getFilename(s.filenames, subst.info.fileIndex)
  485. proc getModule(s: PRstSharedState, subst: AnchorSubst): string =
  486. result = getFilename(s.filenames, subst.module)
  487. proc currFilename(s: PRstSharedState): string =
  488. getFilename(s.filenames, s.currFileIdx)
  489. proc newRstSharedState*(options: RstParseOptions,
  490. filename: string,
  491. findFile: FindFileHandler,
  492. findRefFile: FindRefFileHandler,
  493. msgHandler: MsgHandler,
  494. hasToc: bool): PRstSharedState =
  495. let r = defaultRole(options)
  496. result = PRstSharedState(
  497. currRole: r,
  498. currRoleKind: whichRoleAux(r),
  499. options: options,
  500. msgHandler: if not isNil(msgHandler): msgHandler else: defaultMsgHandler,
  501. findFile: if not isNil(findFile): findFile else: defaultFindFile,
  502. findRefFile:
  503. if not isNil(findRefFile): findRefFile
  504. else: defaultFindRefFile,
  505. hasToc: hasToc
  506. )
  507. setCurrFilename(result, filename)
  508. proc curLine(p: RstParser): int = p.line + currentTok(p).line
  509. proc findRelativeFile(p: RstParser; filename: string): string =
  510. result = p.s.currFilename.splitFile.dir / filename
  511. if not fileExists(result):
  512. result = p.s.findFile(filename)
  513. proc rstMessage(p: RstParser, msgKind: MsgKind, arg: string) =
  514. p.s.msgHandler(p.s.currFilename, curLine(p),
  515. p.col + currentTok(p).col, msgKind, arg)
  516. proc rstMessage(s: PRstSharedState, msgKind: MsgKind, arg: string) =
  517. s.msgHandler(s.currFilename, LineRstInit, ColRstInit, msgKind, arg)
  518. proc rstMessage(s: PRstSharedState, msgKind: MsgKind, arg: string;
  519. line, col: int) =
  520. s.msgHandler(s.currFilename, line, col, msgKind, arg)
  521. proc rstMessage(s: PRstSharedState, filename: string, msgKind: MsgKind,
  522. arg: string) =
  523. s.msgHandler(filename, LineRstInit, ColRstInit, msgKind, arg)
  524. proc rstMessage*(filenames: RstFileTable, f: MsgHandler,
  525. info: TLineInfo, msgKind: MsgKind, arg: string) =
  526. ## Print warnings using `info`, i.e. in 2nd-pass warnings for
  527. ## footnotes/substitutions/references or from ``rstgen.nim``.
  528. let file = getFilename(filenames, info.fileIndex)
  529. f(file, info.line.int, info.col.int, msgKind, arg)
  530. proc rstMessage(p: RstParser, msgKind: MsgKind, arg: string, line, col: int) =
  531. p.s.msgHandler(p.s.currFilename, p.line + line,
  532. p.col + col, msgKind, arg)
  533. proc rstMessage(p: RstParser, msgKind: MsgKind) =
  534. p.s.msgHandler(p.s.currFilename, curLine(p),
  535. p.col + currentTok(p).col, msgKind,
  536. currentTok(p).symbol)
  537. # Functions `isPureRst` & `stopOrWarn` address differences between
  538. # Markdown and RST:
  539. # * Markdown always tries to continue working. If it is really impossible
  540. # to parse a markup element, its proc just returns `nil` and parsing
  541. # continues for it as for normal text paragraph.
  542. # The downside is that real mistakes/typos are often silently ignored.
  543. # The same applies to legacy `RstMarkdown` mode for nimforum.
  544. # * RST really signals errors. The downside is that it's more intrusive -
  545. # the user must escape special syntax with \ explicitly.
  546. #
  547. # TODO: we need to apply this strategy to all markup elements eventually.
  548. func isPureRst(p: RstParser): bool = roSupportMarkdown notin p.s.options
  549. func isRst(p: RstParser): bool = roPreferMarkdown notin p.s.options
  550. func isMd(p: RstParser): bool = roPreferMarkdown in p.s.options
  551. func isMd(s: PRstSharedState): bool = roPreferMarkdown in s.options
  552. proc stopOrWarn(p: RstParser, errorType: MsgKind, arg: string) =
  553. let realMsgKind = if isPureRst(p): errorType else: mwRstStyle
  554. rstMessage(p, realMsgKind, arg)
  555. proc stopOrWarn(p: RstParser, errorType: MsgKind, arg: string, line, col: int) =
  556. let realMsgKind = if isPureRst(p): errorType else: mwRstStyle
  557. rstMessage(p, realMsgKind, arg, line, col)
  558. proc currInd(p: RstParser): int =
  559. result = p.indentStack[high(p.indentStack)]
  560. proc pushInd(p: var RstParser, ind: int) =
  561. p.indentStack.add(ind)
  562. proc popInd(p: var RstParser) =
  563. if p.indentStack.len > 1: setLen(p.indentStack, p.indentStack.len - 1)
  564. # Working with indentation in rst.nim
  565. # -----------------------------------
  566. #
  567. # Every line break has an associated tkIndent.
  568. # The tokenizer writes back the first column of next non-blank line
  569. # in all preceeding tkIndent tokens to the `ival` field of tkIndent.
  570. #
  571. # RST document is separated into body elements (B.E.), every of which
  572. # has a dedicated handler proc (or block of logic when B.E. is a block quote)
  573. # that should follow the next rule:
  574. # Every B.E. handler proc should finish at tkIndent (newline)
  575. # after its B.E. finishes.
  576. # Then its callers (which is `parseSection` or another B.E. handler)
  577. # check for tkIndent ival (without necessity to advance `p.idx`)
  578. # and decide themselves whether they continue processing or also stop.
  579. #
  580. # An example::
  581. #
  582. # L RST text fragment indentation
  583. # +--------------------+
  584. # 1 | | <- (empty line at the start of file) no tokens
  585. # 2 |First paragraph. | <- tkIndent has ival=0, and next tkWord has col=0
  586. # 3 | | <- tkIndent has ival=0
  587. # 4 |* bullet item and | <- tkIndent has ival=0, and next tkPunct has col=0
  588. # 5 | its continuation | <- tkIndent has ival=2, and next tkWord has col=2
  589. # 6 | | <- tkIndent has ival=4
  590. # 7 | Block quote | <- tkIndent has ival=4, and next tkWord has col=4
  591. # 8 | | <- tkIndent has ival=0
  592. # 9 | | <- tkIndent has ival=0
  593. # 10|Final paragraph | <- tkIndent has ival=0, and tkWord has col=0
  594. # +--------------------+
  595. # C:01234
  596. #
  597. # Here parser starts with initial `indentStack=[0]` and then calls the
  598. # 1st `parseSection`:
  599. #
  600. # - `parseSection` calls `parseParagraph` and "First paragraph" is parsed
  601. # - bullet list handler is started at reaching ``*`` (L4 C0), it
  602. # starts bullet item logic (L4 C2), which calls `pushInd(p, ind=2)`,
  603. # then calls `parseSection` (2nd call, nested) which parses
  604. # paragraph "bullet list and its continuation" and then starts
  605. # a block quote logic (L7 C4).
  606. # The block quote logic calls calls `pushInd(p, ind=4)` and
  607. # calls `parseSection` again, so a (simplified) sequence of calls now is::
  608. #
  609. # parseSection -> parseBulletList ->
  610. # parseSection (+block quote logic) -> parseSection
  611. #
  612. # 3rd `parseSection` finishes, block quote logic calls `popInd(p)`,
  613. # it returns to bullet item logic, which sees that next tkIndent has
  614. # ival=0 and stops there since the required indentation for a bullet item
  615. # is 2 and 0<2; the bullet item logic calls `popInd(p)`.
  616. # Then bullet list handler checks that next tkWord (L10 C0) has the
  617. # right indentation but does not have ``*`` so stops at tkIndent (L10).
  618. # - 1st `parseSection` invocation calls `parseParagraph` and the
  619. # "Final paragraph" is parsed.
  620. #
  621. # If a B.E. handler has advanced `p.idx` past tkIndent to check
  622. # whether it should continue its processing or not, and decided not to,
  623. # then this B.E. handler should step back (e.g. do `dec p.idx`).
  624. proc initParser(p: var RstParser, sharedState: PRstSharedState) =
  625. p.indentStack = @[0]
  626. p.tok = @[]
  627. p.idx = 0
  628. p.col = ColRstInit
  629. p.line = LineRstInit
  630. p.s = sharedState
  631. proc addNodesAux(n: PRstNode, result: var string) =
  632. if n == nil:
  633. return
  634. if n.kind == rnLeaf:
  635. result.add(n.text)
  636. else:
  637. for i in 0 ..< n.len: addNodesAux(n.sons[i], result)
  638. proc addNodes(n: PRstNode): string =
  639. n.addNodesAux(result)
  640. proc linkName(n: PRstNode): string =
  641. ## Returns a normalized reference name, see:
  642. ## https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#reference-names
  643. n.addNodes.toLowerAscii
  644. proc rstnodeToRefnameAux(n: PRstNode, r: var string, b: var bool) =
  645. template special(s) =
  646. if b:
  647. r.add('-')
  648. b = false
  649. r.add(s)
  650. if n == nil: return
  651. if n.kind == rnLeaf:
  652. for i in 0 ..< n.text.len:
  653. case n.text[i]
  654. of '0'..'9':
  655. if b:
  656. r.add('-')
  657. b = false
  658. if r.len == 0: r.add('Z')
  659. r.add(n.text[i])
  660. of 'a'..'z', '\128'..'\255':
  661. if b:
  662. r.add('-')
  663. b = false
  664. r.add(n.text[i])
  665. of 'A'..'Z':
  666. if b:
  667. r.add('-')
  668. b = false
  669. r.add(chr(ord(n.text[i]) - ord('A') + ord('a')))
  670. of '$': special "dollar"
  671. of '%': special "percent"
  672. of '&': special "amp"
  673. of '^': special "roof"
  674. of '!': special "emark"
  675. of '?': special "qmark"
  676. of '*': special "star"
  677. of '+': special "plus"
  678. of '-': special "minus"
  679. of '/': special "slash"
  680. of '\\': special "backslash"
  681. of '=': special "eq"
  682. of '<': special "lt"
  683. of '>': special "gt"
  684. of '~': special "tilde"
  685. of ':': special "colon"
  686. of '.': special "dot"
  687. of '@': special "at"
  688. of '|': special "bar"
  689. else:
  690. if r.len > 0: b = true
  691. else:
  692. for i in 0 ..< n.len: rstnodeToRefnameAux(n.sons[i], r, b)
  693. proc rstnodeToRefname(n: PRstNode): string =
  694. var b = false
  695. rstnodeToRefnameAux(n, result, b)
  696. proc findSub(s: PRstSharedState, n: PRstNode): int =
  697. var key = addNodes(n)
  698. # the spec says: if no exact match, try one without case distinction:
  699. for i in countup(0, high(s.subs)):
  700. if key == s.subs[i].key:
  701. return i
  702. for i in countup(0, high(s.subs)):
  703. if cmpIgnoreStyle(key, s.subs[i].key) == 0:
  704. return i
  705. result = -1
  706. proc lineInfo(p: RstParser, iTok: int): TLineInfo =
  707. result.col = int16(p.col + p.tok[iTok].col)
  708. result.line = uint16(p.line + p.tok[iTok].line)
  709. result.fileIndex = p.s.currFileIdx
  710. proc lineInfo(p: RstParser): TLineInfo = lineInfo(p, p.idx)
  711. # TODO: we need this simplification because we don't preserve exact starting
  712. # token of currently parsed element:
  713. proc prevLineInfo(p: RstParser): TLineInfo = lineInfo(p, p.idx-1)
  714. proc setSub(p: var RstParser, key: string, value: PRstNode) =
  715. var length = p.s.subs.len
  716. for i in 0 ..< length:
  717. if key == p.s.subs[i].key:
  718. p.s.subs[i].value = value
  719. return
  720. p.s.subs.add(Substitution(key: key, value: value, info: prevLineInfo(p)))
  721. proc setRef(p: var RstParser, key: string, value: PRstNode,
  722. refType: SubstitutionKind) =
  723. var length = p.s.refs.len
  724. for i in 0 ..< length:
  725. if key == p.s.refs[i].key:
  726. if p.s.refs[i].value.addNodes != value.addNodes:
  727. rstMessage(p, mwRedefinitionOfLabel, key)
  728. p.s.refs[i].value = value
  729. return
  730. p.s.refs.add(Substitution(kind: refType, key: key, value: value,
  731. info: prevLineInfo(p)))
  732. proc findRef(s: PRstSharedState, key: string): seq[Substitution] =
  733. for i in countup(0, high(s.refs)):
  734. if key == s.refs[i].key:
  735. result.add s.refs[i]
  736. # Ambiguity in links: we don't follow procedure of removing implicit targets
  737. # defined in https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#implicit-hyperlink-targets
  738. # Instead we just give explicit links a higher priority than to implicit ones
  739. # and report ambiguities as warnings. Hopefully it is easy to remove
  740. # ambiguities manually. Nim auto-generated links from ``docgen.nim``
  741. # have lowest priority: 1 (for procs) and below for other symbol types.
  742. proc refPriority(k: SubstitutionKind): int =
  743. case k
  744. of rstSubstitution: result = 8
  745. of hyperlinkAlias: result = 7
  746. of implicitHyperlinkAlias: result = 2
  747. proc internalRefPriority(k: RstAnchorKind): int =
  748. case k
  749. of manualDirectiveAnchor: result = 6
  750. of manualInlineAnchor: result = 5
  751. of footnoteAnchor: result = 4
  752. of headlineAnchor: result = 3
  753. proc `$`(subst: AnchorSubst): string = # for debug
  754. let s =
  755. case subst.kind
  756. of arInternalRst: "type=" & $subst.anchorType
  757. of arExternalRst: "type=" & $subst.anchorTypeExt
  758. of arNim: "langsym=" & $subst.langSym
  759. result = "(kind=$1, priority=$2, $3)" % [$subst.kind, $subst.priority, s]
  760. proc addAnchorRst(p: var RstParser, name: string, target: PRstNode,
  761. anchorType: RstAnchorKind) =
  762. ## Associates node `target` (which has field `anchor`) with an
  763. ## alias `name` and updates the corresponding aliases in `p.curAnchors`.
  764. let prio = internalRefPriority(anchorType)
  765. for a in p.curAnchors:
  766. p.s.anchors.mgetOrPut(a.alias, newSeq[AnchorSubst]()).add(
  767. AnchorSubst(kind: arInternalRst, target: target, priority: prio,
  768. info: a.info, anchorType: manualDirectiveAnchor))
  769. if name != "":
  770. p.s.anchors.mgetOrPut(name, newSeq[AnchorSubst]()).add(
  771. AnchorSubst(kind: arInternalRst, target: target, priority: prio,
  772. info: prevLineInfo(p), anchorType: anchorType))
  773. p.curAnchors.setLen 0
  774. proc addAnchorExtRst(s: var PRstSharedState, key: string, refn: string,
  775. anchorType: RstAnchorKind, info: TLineInfo) =
  776. let name = key.toLowerAscii
  777. let prio = internalRefPriority(anchorType)
  778. s.anchors.mgetOrPut(name, newSeq[AnchorSubst]()).add(
  779. AnchorSubst(kind: arExternalRst, refnameExt: refn, priority: prio,
  780. info: info,
  781. anchorTypeExt: anchorType))
  782. proc addAnchorNim*(s: var PRstSharedState, external: bool, refn: string, tooltip: string,
  783. langSym: LangSymbol, priority: int,
  784. info: TLineInfo, module: FileIndex) =
  785. ## Adds an anchor `refn`, which follows
  786. ## the rule `arNim` (i.e. a symbol in ``*.nim`` file)
  787. s.anchors.mgetOrPut(langSym.name, newSeq[AnchorSubst]()).add(
  788. AnchorSubst(kind: arNim, external: external, refname: refn, langSym: langSym,
  789. tooltip: tooltip, priority: priority,
  790. info: info))
  791. proc findMainAnchorNim(s: PRstSharedState, signature: PRstNode,
  792. info: TLineInfo):
  793. seq[AnchorSubst] =
  794. var langSym: LangSymbol
  795. try:
  796. langSym = toLangSymbol(signature)
  797. except ValueError: # parsing failed, not a Nim symbol
  798. return
  799. let substitutions = s.anchors.getOrDefault(langSym.name,
  800. newSeq[AnchorSubst]())
  801. if substitutions.len == 0:
  802. return
  803. # logic to select only groups instead of concrete symbols
  804. # with overloads, note that the same symbol can be defined
  805. # in multiple modules and `importdoc`ed:
  806. type GroupKey = tuple[symKind: string, origModule: string]
  807. # map (symKind, file) (like "proc", "os.nim") -> found symbols/groups:
  808. var found: Table[GroupKey, seq[AnchorSubst]]
  809. for subst in substitutions:
  810. if subst.kind == arNim:
  811. if match(subst.langSym, langSym):
  812. let key: GroupKey = (subst.langSym.symKind, getModule(s, subst))
  813. found.mgetOrPut(key, newSeq[AnchorSubst]()).add subst
  814. for key, sList in found:
  815. if sList.len == 1:
  816. result.add sList[0]
  817. else: # > 1, there are overloads, potential ambiguity in this `symKind`
  818. if langSym.parametersProvided:
  819. # there are non-group signatures, select only them
  820. for s in sList:
  821. if not s.langSym.isGroup:
  822. result.add s
  823. else: # when there are many overloads a link like foo_ points to all
  824. # of them, so selecting the group
  825. var foundGroup = false
  826. for s in sList:
  827. if s.langSym.isGroup:
  828. result.add s
  829. foundGroup = true
  830. break
  831. doAssert(foundGroup,
  832. "docgen has not generated the group for $1 (file $2)" % [
  833. langSym.name, getModule(s, sList[0]) ])
  834. proc findMainAnchorRst(s: PRstSharedState, linkText: string, info: TLineInfo):
  835. seq[AnchorSubst] =
  836. let name = linkText.toLowerAscii
  837. let substitutions = s.anchors.getOrDefault(name, newSeq[AnchorSubst]())
  838. for s in substitutions:
  839. if s.kind in {arInternalRst, arExternalRst}:
  840. result.add s
  841. proc addFootnoteNumManual(p: var RstParser, num: int) =
  842. ## add manually-numbered footnote
  843. for fnote in p.s.footnotes:
  844. if fnote.number == num:
  845. rstMessage(p, mwRedefinitionOfLabel, $num)
  846. return
  847. p.s.footnotes.add((fnManualNumber, num, -1, -1, $num))
  848. proc addFootnoteNumAuto(p: var RstParser, label: string) =
  849. ## add auto-numbered footnote.
  850. ## Empty label [#] means it'll be resolved by the occurrence.
  851. if label == "": # simple auto-numbered [#]
  852. p.s.lineFootnoteNum.add lineInfo(p)
  853. p.s.footnotes.add((fnAutoNumber, -1, p.s.lineFootnoteNum.len, -1, label))
  854. else: # auto-numbered with label [#label]
  855. for fnote in p.s.footnotes:
  856. if fnote.label == label:
  857. rstMessage(p, mwRedefinitionOfLabel, label)
  858. return
  859. p.s.footnotes.add((fnAutoNumberLabel, -1, -1, -1, label))
  860. proc addFootnoteSymAuto(p: var RstParser) =
  861. p.s.lineFootnoteSym.add lineInfo(p)
  862. p.s.footnotes.add((fnAutoSymbol, -1, -1, p.s.lineFootnoteSym.len, ""))
  863. proc orderFootnotes(s: PRstSharedState) =
  864. ## numerate auto-numbered footnotes taking into account that all
  865. ## manually numbered ones always have preference.
  866. ## Save the result back to `s.footnotes`.
  867. # Report an error if found any mismatch in number of automatic footnotes
  868. proc listFootnotes(locations: seq[TLineInfo]): string =
  869. var lines: seq[string]
  870. for info in locations:
  871. if s.filenames.len > 1:
  872. let file = getFilename(s.filenames, info.fileIndex)
  873. lines.add file & ":"
  874. else: # no need to add file name here if there is only 1
  875. lines.add ""
  876. lines[^1].add $info.line
  877. result.add $lines.len & " (lines " & join(lines, ", ") & ")"
  878. if s.lineFootnoteNum.len != s.lineFootnoteNumRef.len:
  879. rstMessage(s, meFootnoteMismatch,
  880. "$1 != $2" % [listFootnotes(s.lineFootnoteNum),
  881. listFootnotes(s.lineFootnoteNumRef)] &
  882. " for auto-numbered footnotes")
  883. if s.lineFootnoteSym.len != s.lineFootnoteSymRef.len:
  884. rstMessage(s, meFootnoteMismatch,
  885. "$1 != $2" % [listFootnotes(s.lineFootnoteSym),
  886. listFootnotes(s.lineFootnoteSymRef)] &
  887. " for auto-symbol footnotes")
  888. var result: seq[FootnoteSubst]
  889. var manuallyN, autoN, autoSymbol: seq[FootnoteSubst]
  890. for fs in s.footnotes:
  891. if fs.kind == fnManualNumber: manuallyN.add fs
  892. elif fs.kind in {fnAutoNumber, fnAutoNumberLabel}: autoN.add fs
  893. else: autoSymbol.add fs
  894. if autoN.len == 0:
  895. result = manuallyN
  896. else:
  897. # fill gaps between manually numbered footnotes in ascending order
  898. manuallyN.sort() # sort by number - its first field
  899. var lst = initSinglyLinkedList[FootnoteSubst]()
  900. for elem in manuallyN: lst.append(elem)
  901. var firstAuto = 0
  902. if lst.head == nil or lst.head.value.number != 1:
  903. # no manual footnote [1], start numeration from 1 for auto-numbered
  904. lst.prepend (autoN[0].kind, 1, autoN[0].autoNumIdx, -1, autoN[0].label)
  905. firstAuto = 1
  906. var curNode = lst.head
  907. var nextNode: SinglyLinkedNode[FootnoteSubst]
  908. # go simultaneously through `autoN` and `lst` looking for gaps
  909. for (kind, x, autoNumIdx, y, label) in autoN[firstAuto .. ^1]:
  910. while (nextNode = curNode.next; nextNode != nil):
  911. if nextNode.value.number - curNode.value.number > 1:
  912. # gap found, insert new node `n` between curNode and nextNode:
  913. var n = newSinglyLinkedNode((kind, curNode.value.number + 1,
  914. autoNumIdx, -1, label))
  915. curNode.next = n
  916. n.next = nextNode
  917. curNode = n
  918. break
  919. else:
  920. curNode = nextNode
  921. if nextNode == nil: # no gap found, just append
  922. lst.append (kind, curNode.value.number + 1, autoNumIdx, -1, label)
  923. curNode = lst.tail
  924. result = lst.toSeq
  925. # we use ASCII symbols instead of those recommended in RST specification:
  926. const footnoteAutoSymbols = ["*", "^", "+", "=", "~", "$", "@", "%", "&"]
  927. for fs in autoSymbol:
  928. # assignment order: *, **, ***, ^, ^^, ^^^, ... &&&, ****, *****, ...
  929. let i = fs.autoSymIdx - 1
  930. let symbolNum = (i div 3) mod footnoteAutoSymbols.len
  931. let nSymbols = (1 + i mod 3) + 3 * (i div (3 * footnoteAutoSymbols.len))
  932. let label = footnoteAutoSymbols[symbolNum].repeat(nSymbols)
  933. result.add((fs.kind, -1, -1, fs.autoSymIdx, label))
  934. s.footnotes = result
  935. proc getFootnoteNum(s: PRstSharedState, label: string): int =
  936. ## get number from label. Must be called after `orderFootnotes`.
  937. result = -1
  938. for fnote in s.footnotes:
  939. if fnote.label == label:
  940. return fnote.number
  941. proc getFootnoteNum(s: PRstSharedState, order: int): int =
  942. ## get number from occurrence. Must be called after `orderFootnotes`.
  943. result = -1
  944. for fnote in s.footnotes:
  945. if fnote.autoNumIdx == order:
  946. return fnote.number
  947. proc getAutoSymbol(s: PRstSharedState, order: int): string =
  948. ## get symbol from occurrence of auto-symbol footnote.
  949. result = "???"
  950. for fnote in s.footnotes:
  951. if fnote.autoSymIdx == order:
  952. return fnote.label
  953. proc newRstNodeA(p: var RstParser, kind: RstNodeKind): PRstNode =
  954. ## create node and consume the current anchor
  955. result = newRstNode(kind)
  956. if p.curAnchors.len > 0:
  957. result.anchor = p.curAnchors[0].anchor
  958. addAnchorRst(p, "", result, manualDirectiveAnchor)
  959. template newLeaf(s: string): PRstNode = newRstLeaf(s)
  960. proc newLeaf(p: var RstParser): PRstNode =
  961. result = newLeaf(currentTok(p).symbol)
  962. proc validRefnamePunct(x: string): bool =
  963. ## https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#reference-names
  964. x.len == 1 and x[0] in {'-', '_', '.', ':', '+'}
  965. func getRefnameIdx(p: RstParser, startIdx: int): int =
  966. ## Gets last token index of a refname ("word" in RST terminology):
  967. ##
  968. ## reference names are single words consisting of alphanumerics plus
  969. ## isolated (no two adjacent) internal hyphens, underscores, periods,
  970. ## colons and plus signs; no whitespace or other characters are allowed.
  971. ##
  972. ## Refnames are used for:
  973. ## - reference names
  974. ## - role names
  975. ## - directive names
  976. ## - footnote labels
  977. ##
  978. # TODO: use this func in all other relevant places
  979. var j = startIdx
  980. if p.tok[j].kind == tkWord:
  981. inc j
  982. while p.tok[j].kind == tkPunct and validRefnamePunct(p.tok[j].symbol) and
  983. p.tok[j+1].kind == tkWord:
  984. inc j, 2
  985. result = j - 1
  986. func getRefname(p: RstParser, startIdx: int): (string, int) =
  987. let lastIdx = getRefnameIdx(p, startIdx)
  988. result[1] = lastIdx
  989. for j in startIdx..lastIdx:
  990. result[0].add p.tok[j].symbol
  991. proc getReferenceName(p: var RstParser, endStr: string): PRstNode =
  992. var res = newRstNode(rnInner)
  993. while true:
  994. case currentTok(p).kind
  995. of tkWord, tkOther, tkWhite:
  996. res.add(newLeaf(p))
  997. of tkPunct:
  998. if currentTok(p).symbol == endStr:
  999. inc p.idx
  1000. break
  1001. else:
  1002. res.add(newLeaf(p))
  1003. else:
  1004. rstMessage(p, meExpected, endStr)
  1005. break
  1006. inc p.idx
  1007. result = res
  1008. proc untilEol(p: var RstParser): PRstNode =
  1009. result = newRstNode(rnInner)
  1010. while currentTok(p).kind notin {tkIndent, tkEof}:
  1011. result.add(newLeaf(p))
  1012. inc p.idx
  1013. proc expect(p: var RstParser, tok: string) =
  1014. if currentTok(p).symbol == tok: inc p.idx
  1015. else: rstMessage(p, meExpected, tok)
  1016. proc inlineMarkdownEnd(p: RstParser): bool =
  1017. result = prevTok(p).kind notin {tkIndent, tkWhite}
  1018. ## (For a special case of ` we don't allow spaces surrounding it
  1019. ## unlike original Markdown because this behavior confusing/useless)
  1020. proc inlineRstEnd(p: RstParser): bool =
  1021. # rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
  1022. # Rule 2:
  1023. result = prevTok(p).kind notin {tkIndent, tkWhite}
  1024. if not result: return
  1025. # Rule 7:
  1026. result = nextTok(p).kind in {tkIndent, tkWhite, tkEof} or
  1027. nextTok(p).symbol[0] in
  1028. {'\'', '\"', ')', ']', '}', '>', '-', '/', '\\', ':', '.', ',', ';', '!', '?', '_'}
  1029. proc isInlineMarkupEnd(p: RstParser, markup: string, exact: bool): bool =
  1030. if exact:
  1031. result = currentTok(p).symbol == markup
  1032. else:
  1033. result = currentTok(p).symbol.endsWith markup
  1034. if (not result) and markup == "``":
  1035. # check that escaping may have splitted `` to 2 tokens ` and `
  1036. result = currentTok(p).symbol == "`" and prevTok(p).symbol == "`"
  1037. if not result: return
  1038. # surroundings check
  1039. if markup in ["_", "__"]:
  1040. result = inlineRstEnd(p)
  1041. else:
  1042. if roPreferMarkdown in p.s.options: result = inlineMarkdownEnd(p)
  1043. else: result = inlineRstEnd(p)
  1044. proc rstRuleSurround(p: RstParser): bool =
  1045. result = true
  1046. # Rules 4 & 5:
  1047. if p.idx > 0:
  1048. var d: char
  1049. var c = prevTok(p).symbol[0]
  1050. case c
  1051. of '\'', '\"': d = c
  1052. of '(': d = ')'
  1053. of '[': d = ']'
  1054. of '{': d = '}'
  1055. of '<': d = '>'
  1056. else: d = '\0'
  1057. if d != '\0': result = nextTok(p).symbol[0] != d
  1058. proc inlineMarkdownStart(p: RstParser): bool =
  1059. result = nextTok(p).kind notin {tkIndent, tkWhite, tkEof}
  1060. if not result: return
  1061. # this rst rule is really nice, let us use it in Markdown mode too.
  1062. result = rstRuleSurround(p)
  1063. proc inlineRstStart(p: RstParser): bool =
  1064. ## rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
  1065. # Rule 6
  1066. result = p.idx == 0 or prevTok(p).kind in {tkIndent, tkWhite} or
  1067. prevTok(p).symbol[0] in {'\'', '\"', '(', '[', '{', '<', '-', '/', ':', '_'}
  1068. if not result: return
  1069. # Rule 1:
  1070. result = nextTok(p).kind notin {tkIndent, tkWhite, tkEof}
  1071. if not result: return
  1072. result = rstRuleSurround(p)
  1073. proc isInlineMarkupStart(p: RstParser, markup: string): bool =
  1074. if markup != "_`":
  1075. result = currentTok(p).symbol == markup
  1076. else: # _` is a 2 token case
  1077. result = currentTok(p).symbol == "_" and nextTok(p).symbol == "`"
  1078. if not result: return
  1079. # surroundings check
  1080. if markup in ["_", "__", "[", "|"]:
  1081. # Note: we require space/punctuation even before [markdown link](...)
  1082. result = inlineRstStart(p)
  1083. else:
  1084. if roPreferMarkdown in p.s.options: result = inlineMarkdownStart(p)
  1085. else: result = inlineRstStart(p)
  1086. proc match(p: RstParser, start: int, expr: string): bool =
  1087. # regular expressions are:
  1088. # special char exact match
  1089. # 'w' tkWord
  1090. # ' ' tkWhite
  1091. # 'a' tkAdornment
  1092. # 'i' tkIndent
  1093. # 'I' tkIndent or tkEof
  1094. # 'p' tkPunct
  1095. # 'T' always true
  1096. # 'E' whitespace, indent or eof
  1097. # 'e' any enumeration sequence or '#' (for enumeration lists)
  1098. # 'x' a..z or '#' (for enumeration lists)
  1099. # 'n' 0..9 or '#' (for enumeration lists)
  1100. var i = 0
  1101. var j = start
  1102. var last = expr.len - 1
  1103. while i <= last:
  1104. case expr[i]
  1105. of 'w':
  1106. let lastIdx = getRefnameIdx(p, j)
  1107. result = lastIdx >= j
  1108. if result: j = lastIdx
  1109. of ' ': result = p.tok[j].kind == tkWhite
  1110. of 'i': result = p.tok[j].kind == tkIndent
  1111. of 'I': result = p.tok[j].kind in {tkIndent, tkEof}
  1112. of 'p': result = p.tok[j].kind == tkPunct
  1113. of 'a': result = p.tok[j].kind == tkAdornment
  1114. of 'o': result = p.tok[j].kind == tkOther
  1115. of 'T': result = true
  1116. of 'E': result = p.tok[j].kind in {tkEof, tkWhite, tkIndent}
  1117. of 'e', 'x', 'n':
  1118. result = p.tok[j].kind == tkWord or p.tok[j].symbol == "#"
  1119. if result:
  1120. case p.tok[j].symbol[0]
  1121. of '#': result = true
  1122. of 'a'..'z', 'A'..'Z':
  1123. result = expr[i] in {'e', 'x'} and p.tok[j].symbol.len == 1
  1124. of '0'..'9':
  1125. result = expr[i] in {'e', 'n'} and
  1126. allCharsInSet(p.tok[j].symbol, {'0'..'9'})
  1127. else: result = false
  1128. else:
  1129. var c = expr[i]
  1130. var length = 0
  1131. while i <= last and expr[i] == c:
  1132. inc i
  1133. inc length
  1134. dec i
  1135. result = p.tok[j].kind in {tkPunct, tkAdornment} and
  1136. p.tok[j].symbol.len == length and p.tok[j].symbol[0] == c
  1137. if not result: return
  1138. inc j
  1139. inc i
  1140. result = true
  1141. proc safeProtocol*(linkStr: var string): string =
  1142. # Returns link's protocol and, if it's not safe, clears `linkStr`
  1143. result = ""
  1144. if scanf(linkStr, "$w:", result):
  1145. # if it has a protocol at all, ensure that it's not 'javascript:' or worse:
  1146. if cmpIgnoreCase(result, "http") == 0 or
  1147. cmpIgnoreCase(result, "https") == 0 or
  1148. cmpIgnoreCase(result, "ftp") == 0:
  1149. discard "it's fine"
  1150. else:
  1151. linkStr = ""
  1152. proc fixupEmbeddedRef(p: var RstParser, n, a, b: PRstNode): bool =
  1153. # Returns `true` if the link belongs to an allowed protocol
  1154. var sep = - 1
  1155. for i in countdown(n.len - 2, 0):
  1156. if n.sons[i].text == "<":
  1157. sep = i
  1158. break
  1159. var incr = if sep > 0 and n.sons[sep - 1].text[0] == ' ': 2 else: 1
  1160. for i in countup(0, sep - incr): a.add(n.sons[i])
  1161. var linkStr = ""
  1162. for i in countup(sep + 1, n.len - 2): linkStr.add(n.sons[i].addNodes)
  1163. if linkStr != "":
  1164. let protocol = safeProtocol(linkStr)
  1165. result = linkStr != ""
  1166. if not result:
  1167. rstMessage(p, mwBrokenLink, protocol,
  1168. p.tok[p.idx-3].line, p.tok[p.idx-3].col)
  1169. b.add newLeaf(linkStr)
  1170. proc whichRole(p: RstParser, sym: string): RstNodeKind =
  1171. result = whichRoleAux(sym)
  1172. if result == rnUnknownRole:
  1173. rstMessage(p, mwUnsupportedLanguage, sym)
  1174. proc toInlineCode(n: PRstNode, language: string): PRstNode =
  1175. ## Creates rnInlineCode and attaches `n` contents as code (in 3rd son).
  1176. result = newRstNode(rnInlineCode, info=n.info)
  1177. let args = newRstNode(rnDirArg)
  1178. var lang = language
  1179. if language == "cpp": lang = "c++"
  1180. elif language == "csharp": lang = "c#"
  1181. args.add newLeaf(lang)
  1182. result.add args
  1183. result.add PRstNode(nil)
  1184. var lb = newRstNode(rnLiteralBlock)
  1185. var s: string
  1186. for i in n.sons:
  1187. assert i.kind == rnLeaf
  1188. s.add i.text
  1189. lb.add newLeaf(s)
  1190. result.add lb
  1191. proc toOtherRole(n: PRstNode, kind: RstNodeKind, roleName: string): PRstNode =
  1192. let newN = newRstNode(rnInner, n.sons)
  1193. let newSons = @[newN, newLeaf(roleName)]
  1194. result = newRstNode(kind, newSons)
  1195. proc parsePostfix(p: var RstParser, n: PRstNode): PRstNode =
  1196. ## Finalizes node `n` that was tentatively determined as interpreted text.
  1197. var newKind = n.kind
  1198. var newSons = n.sons
  1199. proc finalizeInterpreted(node: PRstNode, newKind: RstNodeKind,
  1200. newSons: seq[PRstNode], roleName: string):
  1201. PRstNode {.nimcall.} =
  1202. # fixes interpreted text (`x` or `y`:role:) to proper internal AST format
  1203. if newKind in {rnUnknownRole, rnCodeFragment}:
  1204. result = node.toOtherRole(newKind, roleName)
  1205. elif newKind == rnInlineCode:
  1206. result = node.toInlineCode(language=roleName)
  1207. else:
  1208. result = newRstNode(newKind, newSons)
  1209. if isInlineMarkupEnd(p, "_", exact=true) or
  1210. isInlineMarkupEnd(p, "__", exact=true):
  1211. inc p.idx
  1212. if p.tok[p.idx-2].symbol == "`" and p.tok[p.idx-3].symbol == ">":
  1213. var a = newRstNode(rnInner)
  1214. var b = newRstNode(rnInner)
  1215. if fixupEmbeddedRef(p, n, a, b):
  1216. if a.len == 0: # e.g. `<a_named_relative_link>`_
  1217. newKind = rnStandaloneHyperlink
  1218. newSons = @[b]
  1219. else: # e.g. `link title <http://site>`_
  1220. newKind = rnHyperlink
  1221. newSons = @[a, b]
  1222. setRef(p, rstnodeToRefname(a), b, implicitHyperlinkAlias)
  1223. else: # include as plain text, not a link
  1224. newKind = rnInner
  1225. newSons = n.sons
  1226. result = newRstNode(newKind, newSons)
  1227. else: # some link that will be resolved in `resolveSubs`
  1228. newKind = rnRstRef
  1229. result = newRstNode(newKind, sons=newSons, info=n.info)
  1230. elif match(p, p.idx, ":w:"):
  1231. # a role:
  1232. let (roleName, lastIdx) = getRefname(p, p.idx+1)
  1233. newKind = whichRole(p, roleName)
  1234. result = n.finalizeInterpreted(newKind, newSons, roleName)
  1235. p.idx = lastIdx + 2
  1236. else:
  1237. result = n.finalizeInterpreted(p.s.currRoleKind, newSons, p.s.currRole)
  1238. proc matchVerbatim(p: RstParser, start: int, expr: string): int =
  1239. result = start
  1240. var j = 0
  1241. while j < expr.len and result < p.tok.len and
  1242. continuesWith(expr, p.tok[result].symbol, j):
  1243. inc j, p.tok[result].symbol.len
  1244. inc result
  1245. if j < expr.len: result = 0
  1246. proc parseSmiley(p: var RstParser): PRstNode =
  1247. if currentTok(p).symbol[0] notin SmileyStartChars: return
  1248. for key, val in items(Smilies):
  1249. let m = matchVerbatim(p, p.idx, key)
  1250. if m > 0:
  1251. p.idx = m
  1252. result = newRstNode(rnSmiley)
  1253. result.text = val
  1254. return
  1255. proc isUrl(p: RstParser, i: int): bool =
  1256. result = p.tok[i+1].symbol == ":" and p.tok[i+2].symbol == "//" and
  1257. p.tok[i+3].kind == tkWord and
  1258. p.tok[i].symbol in ["http", "https", "ftp", "telnet", "file"]
  1259. proc checkParen(token: Token, parensStack: var seq[char]): bool {.inline.} =
  1260. ## Returns `true` iff `token` is a closing parenthesis for some
  1261. ## previous opening parenthesis saved in `parensStack`.
  1262. ## This is according Markdown balanced parentheses rule
  1263. ## (https://spec.commonmark.org/0.29/#link-destination)
  1264. ## to allow links like
  1265. ## https://en.wikipedia.org/wiki/APL_(programming_language),
  1266. ## we use it for RST also.
  1267. result = false
  1268. if token.kind == tkPunct:
  1269. let c = token.symbol[0]
  1270. if c in {'(', '[', '{'}: # push
  1271. parensStack.add c
  1272. elif c in {')', ']', '}'}: # try pop
  1273. # a case like ([) inside a link is allowed and [ is also `pop`ed:
  1274. for i in countdown(parensStack.len - 1, 0):
  1275. if (parensStack[i] == '(' and c == ')' or
  1276. parensStack[i] == '[' and c == ']' or
  1277. parensStack[i] == '{' and c == '}'):
  1278. parensStack.setLen i
  1279. result = true
  1280. break
  1281. proc parseUrl(p: var RstParser): PRstNode =
  1282. ## https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#standalone-hyperlinks
  1283. result = newRstNode(rnStandaloneHyperlink)
  1284. var lastIdx = p.idx
  1285. var closedParenIdx = p.idx - 1 # for balanced parens rule
  1286. var parensStack: seq[char]
  1287. while p.tok[lastIdx].kind in {tkWord, tkPunct, tkOther}:
  1288. let isClosing = checkParen(p.tok[lastIdx], parensStack)
  1289. if isClosing:
  1290. closedParenIdx = lastIdx
  1291. inc lastIdx
  1292. dec lastIdx
  1293. # standalone URL can not end with punctuation in RST
  1294. while lastIdx > closedParenIdx and p.tok[lastIdx].kind == tkPunct and
  1295. p.tok[lastIdx].symbol != "/":
  1296. dec lastIdx
  1297. var s = ""
  1298. for i in p.idx .. lastIdx: s.add p.tok[i].symbol
  1299. result.add s
  1300. p.idx = lastIdx + 1
  1301. proc parseWordOrRef(p: var RstParser, father: PRstNode) =
  1302. ## Parses a normal word or may be a reference or URL.
  1303. if nextTok(p).kind != tkPunct: # <- main path, a normal word
  1304. father.add newLeaf(p)
  1305. inc p.idx
  1306. elif isUrl(p, p.idx): # URL http://something
  1307. father.add parseUrl(p)
  1308. else:
  1309. # check for reference (probably, long one like some.ref.with.dots_ )
  1310. var saveIdx = p.idx
  1311. var reference: PRstNode = nil
  1312. inc p.idx
  1313. while currentTok(p).kind in {tkWord, tkPunct}:
  1314. if currentTok(p).kind == tkPunct:
  1315. if isInlineMarkupEnd(p, "_", exact=true):
  1316. reference = newRstNode(rnRstRef, info=lineInfo(p, saveIdx))
  1317. break
  1318. if not validRefnamePunct(currentTok(p).symbol):
  1319. break
  1320. inc p.idx
  1321. if reference != nil:
  1322. for i in saveIdx..p.idx-1: reference.add newLeaf(p.tok[i].symbol)
  1323. father.add reference
  1324. inc p.idx # skip final _
  1325. else: # 1 normal word
  1326. father.add newLeaf(p.tok[saveIdx].symbol)
  1327. p.idx = saveIdx + 1
  1328. proc parseBackslash(p: var RstParser, father: PRstNode) =
  1329. assert(currentTok(p).kind == tkPunct)
  1330. if currentTok(p).symbol == "\\":
  1331. # XXX: Unicode?
  1332. inc p.idx
  1333. if currentTok(p).kind != tkWhite: father.add(newLeaf(p))
  1334. if currentTok(p).kind != tkEof: inc p.idx
  1335. else:
  1336. father.add(newLeaf(p))
  1337. inc p.idx
  1338. proc parseUntil(p: var RstParser, father: PRstNode, postfix: string,
  1339. interpretBackslash: bool) =
  1340. let
  1341. line = currentTok(p).line
  1342. col = currentTok(p).col
  1343. inc p.idx
  1344. while true:
  1345. case currentTok(p).kind
  1346. of tkPunct:
  1347. if isInlineMarkupEnd(p, postfix, exact=false):
  1348. let l = currentTok(p).symbol.len
  1349. if l > postfix.len:
  1350. # handle cases like *emphasis with stars****. (It's valid RST!)
  1351. father.add newLeaf(currentTok(p).symbol[0 ..< l - postfix.len])
  1352. elif postfix == "``" and currentTok(p).symbol == "`" and
  1353. prevTok(p).symbol == "`":
  1354. # handle cases like ``literal\`` - delete ` already added after \
  1355. father.sons.setLen(father.sons.len - 1)
  1356. inc p.idx
  1357. break
  1358. else:
  1359. if postfix == "`":
  1360. if currentTok(p).symbol == "\\":
  1361. if nextTok(p).symbol == "\\":
  1362. father.add newLeaf("\\")
  1363. father.add newLeaf("\\")
  1364. inc p.idx, 2
  1365. elif nextTok(p).symbol == "`": # escape `
  1366. father.add newLeaf("`")
  1367. inc p.idx, 2
  1368. else:
  1369. father.add newLeaf("\\")
  1370. inc p.idx
  1371. else:
  1372. father.add(newLeaf(p))
  1373. inc p.idx
  1374. else:
  1375. if interpretBackslash:
  1376. parseBackslash(p, father)
  1377. else:
  1378. father.add(newLeaf(p))
  1379. inc p.idx
  1380. of tkAdornment, tkWord, tkOther:
  1381. father.add(newLeaf(p))
  1382. inc p.idx
  1383. of tkIndent:
  1384. father.add newLeaf(" ")
  1385. inc p.idx
  1386. if currentTok(p).kind == tkIndent:
  1387. rstMessage(p, meExpected, postfix, line, col)
  1388. break
  1389. of tkWhite:
  1390. father.add newLeaf(" ")
  1391. inc p.idx
  1392. else: rstMessage(p, meExpected, postfix, line, col)
  1393. proc parseMarkdownCodeblockFields(p: var RstParser): PRstNode =
  1394. ## Parses additional (after language string) code block parameters
  1395. ## in a format *suggested* in the `CommonMark Spec`_ with handling of `"`.
  1396. if currentTok(p).kind == tkIndent:
  1397. result = nil
  1398. else:
  1399. result = newRstNode(rnFieldList)
  1400. while currentTok(p).kind != tkIndent:
  1401. if currentTok(p).kind == tkWhite:
  1402. inc p.idx
  1403. else:
  1404. let field = newRstNode(rnField)
  1405. var fieldName = ""
  1406. while currentTok(p).kind notin {tkWhite, tkIndent, tkEof} and
  1407. currentTok(p).symbol != "=":
  1408. fieldName.add currentTok(p).symbol
  1409. inc p.idx
  1410. field.add(newRstNode(rnFieldName, @[newLeaf(fieldName)]))
  1411. if currentTok(p).kind == tkWhite: inc p.idx
  1412. let fieldBody = newRstNode(rnFieldBody)
  1413. if currentTok(p).symbol == "=":
  1414. inc p.idx
  1415. if currentTok(p).kind == tkWhite: inc p.idx
  1416. var fieldValue = ""
  1417. if currentTok(p).symbol == "\"":
  1418. while true:
  1419. fieldValue.add currentTok(p).symbol
  1420. inc p.idx
  1421. if currentTok(p).kind == tkEof:
  1422. rstMessage(p, meExpected, "\"")
  1423. elif currentTok(p).symbol == "\"":
  1424. fieldValue.add "\""
  1425. inc p.idx
  1426. break
  1427. else:
  1428. while currentTok(p).kind notin {tkWhite, tkIndent, tkEof}:
  1429. fieldValue.add currentTok(p).symbol
  1430. inc p.idx
  1431. fieldBody.add newLeaf(fieldValue)
  1432. field.add(fieldBody)
  1433. result.add(field)
  1434. proc mayLoadFile(p: RstParser, result: var PRstNode) =
  1435. var filename = strip(getFieldValue(result, "file"),
  1436. chars = Whitespace + {'"'})
  1437. if filename != "":
  1438. if roSandboxDisabled notin p.s.options:
  1439. let tok = p.tok[p.idx-2]
  1440. rstMessage(p, meSandboxedDirective, "file", tok.line, tok.col)
  1441. var path = p.findRelativeFile(filename)
  1442. if path == "": rstMessage(p, meCannotOpenFile, filename)
  1443. var n = newRstNode(rnLiteralBlock)
  1444. n.add newLeaf(readFile(path))
  1445. result.sons[2] = n
  1446. proc defaultCodeLangNim(p: RstParser, result: var PRstNode) =
  1447. # Create a field block if the input block didn't have any.
  1448. if result.sons[1].isNil: result.sons[1] = newRstNode(rnFieldList)
  1449. assert result.sons[1].kind == rnFieldList
  1450. # Hook the extra field and specify the Nim language as value.
  1451. var extraNode = newRstNode(rnField, info=lineInfo(p))
  1452. extraNode.add(newRstNode(rnFieldName))
  1453. extraNode.add(newRstNode(rnFieldBody))
  1454. extraNode.sons[0].add newLeaf("default-language")
  1455. extraNode.sons[1].add newLeaf("Nim")
  1456. result.sons[1].add(extraNode)
  1457. proc parseMarkdownCodeblock(p: var RstParser): PRstNode =
  1458. result = newRstNodeA(p, rnCodeBlock)
  1459. result.sons.setLen(3)
  1460. let line = curLine(p)
  1461. let baseCol = currentTok(p).col
  1462. let baseSym = currentTok(p).symbol # usually just ```
  1463. inc p.idx
  1464. result.info = lineInfo(p)
  1465. var args = newRstNode(rnDirArg)
  1466. if currentTok(p).kind == tkWord:
  1467. args.add(newLeaf(p))
  1468. inc p.idx
  1469. result.sons[1] = parseMarkdownCodeblockFields(p)
  1470. mayLoadFile(p, result)
  1471. else:
  1472. args = nil
  1473. var n = newLeaf("")
  1474. var isFirstLine = true
  1475. while true:
  1476. if currentTok(p).kind == tkEof:
  1477. rstMessage(p, meMissingClosing,
  1478. "$1 (started at line $2)" % [baseSym, $line])
  1479. break
  1480. elif nextTok(p).kind in {tkPunct, tkAdornment} and
  1481. nextTok(p).symbol[0] == baseSym[0] and
  1482. nextTok(p).symbol.len >= baseSym.len:
  1483. inc p.idx, 2
  1484. break
  1485. elif currentTok(p).kind == tkIndent:
  1486. if not isFirstLine:
  1487. n.text.add "\n"
  1488. if currentTok(p).ival > baseCol:
  1489. n.text.add " ".repeat(currentTok(p).ival - baseCol)
  1490. elif currentTok(p).ival < baseCol:
  1491. rstMessage(p, mwRstStyle,
  1492. "unexpected de-indentation in Markdown code block")
  1493. inc p.idx
  1494. else:
  1495. n.text.add(currentTok(p).symbol)
  1496. inc p.idx
  1497. isFirstLine = false
  1498. result.sons[0] = args
  1499. if result.sons[2] == nil:
  1500. var lb = newRstNode(rnLiteralBlock)
  1501. lb.add(n)
  1502. result.sons[2] = lb
  1503. if result.sons[0].isNil and roNimFile in p.s.options:
  1504. defaultCodeLangNim(p, result)
  1505. proc parseMarkdownLink(p: var RstParser; father: PRstNode): bool =
  1506. # Parses Markdown link. If it's Pandoc auto-link then its second
  1507. # son (target) will be in tokenized format (rnInner with leafs).
  1508. var desc = newRstNode(rnInner)
  1509. var i = p.idx
  1510. var parensStack: seq[char]
  1511. template parse(endToken, dest) =
  1512. parensStack.setLen 0
  1513. inc i # skip begin token
  1514. while true:
  1515. if p.tok[i].kind == tkEof: return false
  1516. if p.tok[i].kind == tkIndent and p.tok[i+1].kind == tkIndent:
  1517. return false
  1518. let isClosing = checkParen(p.tok[i], parensStack)
  1519. if p.tok[i].symbol == endToken and not isClosing:
  1520. break
  1521. let symbol = if p.tok[i].kind == tkIndent: " " else: p.tok[i].symbol
  1522. when dest is string: dest.add symbol
  1523. else: dest.add newLeaf(symbol)
  1524. inc i
  1525. inc i # skip end token
  1526. parse("]", desc)
  1527. if p.tok[i].symbol == "(":
  1528. var link = ""
  1529. let linkIdx = i + 1
  1530. parse(")", link)
  1531. # only commit if we detected no syntax error:
  1532. let protocol = safeProtocol(link)
  1533. if link == "":
  1534. result = false
  1535. rstMessage(p, mwBrokenLink, protocol,
  1536. p.tok[linkIdx].line, p.tok[linkIdx].col)
  1537. else:
  1538. let child = newRstNode(rnHyperlink)
  1539. child.add newLeaf(desc.addNodes)
  1540. child.add link
  1541. father.add child
  1542. p.idx = i
  1543. result = true
  1544. elif roPreferMarkdown in p.s.options:
  1545. # Use Pandoc's implicit_header_references extension
  1546. var n = newRstNode(rnPandocRef)
  1547. if p.tok[i].symbol == "[":
  1548. var link = newRstNode(rnInner)
  1549. let targetIdx = i + 1
  1550. parse("]", link)
  1551. n.add desc
  1552. if link.len != 0: # [description][target]
  1553. n.add link
  1554. n.info = lineInfo(p, targetIdx)
  1555. else: # [description=target][]
  1556. n.add desc
  1557. n.info = lineInfo(p, p.idx + 1)
  1558. else: # [description=target]
  1559. n.add desc
  1560. n.add desc # target is the same as description
  1561. n.info = lineInfo(p, p.idx + 1)
  1562. father.add n
  1563. p.idx = i
  1564. result = true
  1565. else:
  1566. result = false
  1567. proc getRstFootnoteType(label: PRstNode): (FootnoteType, int) =
  1568. if label.sons.len >= 1 and label.sons[0].kind == rnLeaf and
  1569. label.sons[0].text == "#":
  1570. if label.sons.len == 1:
  1571. result = (fnAutoNumber, -1)
  1572. else:
  1573. result = (fnAutoNumberLabel, -1)
  1574. elif label.len == 1 and label.sons[0].kind == rnLeaf and
  1575. label.sons[0].text == "*":
  1576. result = (fnAutoSymbol, -1)
  1577. elif label.len == 1 and label.sons[0].kind == rnLeaf:
  1578. try:
  1579. result = (fnManualNumber, parseInt(label.sons[0].text))
  1580. except ValueError:
  1581. result = (fnCitation, -1)
  1582. else:
  1583. result = (fnCitation, -1)
  1584. proc getMdFootnoteType(label: PRstNode): (FootnoteType, int) =
  1585. try:
  1586. result = (fnManualNumber, parseInt(label.sons[0].text))
  1587. except ValueError:
  1588. result = (fnAutoNumberLabel, -1)
  1589. proc getFootnoteType(s: PRstSharedState, label: PRstNode): (FootnoteType, int) =
  1590. ## Returns footnote/citation type and manual number (if present).
  1591. if isMd(s): getMdFootnoteType(label)
  1592. else: getRstFootnoteType(label)
  1593. proc parseRstFootnoteName(p: var RstParser, reference: bool): PRstNode =
  1594. ## parse footnote/citation label. Precondition: start at `[`.
  1595. ## Label text should be valid ref. name symbol, otherwise nil is returned.
  1596. var i = p.idx + 1
  1597. result = newRstNode(rnInner)
  1598. while true:
  1599. if p.tok[i].kind in {tkEof, tkIndent, tkWhite}:
  1600. return nil
  1601. if p.tok[i].kind == tkPunct:
  1602. case p.tok[i].symbol:
  1603. of "]":
  1604. if i > p.idx + 1 and (not reference or (p.tok[i+1].kind == tkPunct and p.tok[i+1].symbol == "_")):
  1605. inc i # skip ]
  1606. if reference: inc i # skip _
  1607. break # to succeed, it's a footnote/citation indeed
  1608. else:
  1609. return nil
  1610. of "#":
  1611. if i != p.idx + 1:
  1612. return nil
  1613. of "*":
  1614. if i != p.idx + 1 and p.tok[i].kind != tkPunct and p.tok[i+1].symbol != "]":
  1615. return nil
  1616. else:
  1617. if not validRefnamePunct(p.tok[i].symbol):
  1618. return nil
  1619. result.add newLeaf(p.tok[i].symbol)
  1620. inc i
  1621. p.idx = i
  1622. proc isMdFootnoteName(p: RstParser, reference: bool): bool =
  1623. ## Pandoc Markdown footnote extension.
  1624. let j = p.idx
  1625. result = p.tok[j].symbol == "[" and p.tok[j+1].symbol == "^" and
  1626. p.tok[j+2].kind == tkWord
  1627. proc parseMdFootnoteName(p: var RstParser, reference: bool): PRstNode =
  1628. if isMdFootnoteName(p, reference):
  1629. result = newRstNode(rnInner)
  1630. var j = p.idx + 2
  1631. while p.tok[j].kind in {tkWord, tkOther} or
  1632. validRefnamePunct(p.tok[j].symbol):
  1633. result.add newLeaf(p.tok[j].symbol)
  1634. inc j
  1635. if j == p.idx + 2:
  1636. return nil
  1637. if p.tok[j].symbol == "]":
  1638. if reference:
  1639. p.idx = j + 1 # skip ]
  1640. else:
  1641. if p.tok[j+1].symbol == ":":
  1642. p.idx = j + 2 # skip ]:
  1643. else:
  1644. result = nil
  1645. else:
  1646. result = nil
  1647. else:
  1648. result = nil
  1649. proc parseFootnoteName(p: var RstParser, reference: bool): PRstNode =
  1650. if isMd(p): parseMdFootnoteName(p, reference)
  1651. else:
  1652. if isInlineMarkupStart(p, "["): parseRstFootnoteName(p, reference)
  1653. else: nil
  1654. proc isMarkdownCodeBlock(p: RstParser, idx: int): bool =
  1655. let tok = p.tok[idx]
  1656. template allowedSymbol: bool =
  1657. (tok.symbol[0] == '`' or
  1658. roPreferMarkdown in p.s.options and tok.symbol[0] == '~')
  1659. result = (roSupportMarkdown in p.s.options and
  1660. tok.kind in {tkPunct, tkAdornment} and
  1661. allowedSymbol and
  1662. tok.symbol.len >= 3)
  1663. proc isMarkdownCodeBlock(p: RstParser): bool =
  1664. isMarkdownCodeBlock(p, p.idx)
  1665. proc parseInline(p: var RstParser, father: PRstNode) =
  1666. var n: PRstNode # to be used in `if` condition
  1667. let saveIdx = p.idx
  1668. case currentTok(p).kind
  1669. of tkPunct:
  1670. if isInlineMarkupStart(p, "***"):
  1671. var n = newRstNode(rnTripleEmphasis)
  1672. parseUntil(p, n, "***", true)
  1673. father.add(n)
  1674. elif isInlineMarkupStart(p, "**"):
  1675. var n = newRstNode(rnStrongEmphasis)
  1676. parseUntil(p, n, "**", true)
  1677. father.add(n)
  1678. elif isInlineMarkupStart(p, "*"):
  1679. var n = newRstNode(rnEmphasis)
  1680. parseUntil(p, n, "*", true)
  1681. father.add(n)
  1682. elif isInlineMarkupStart(p, "_`"):
  1683. var n = newRstNode(rnInlineTarget)
  1684. inc p.idx
  1685. parseUntil(p, n, "`", false)
  1686. n.anchor = rstnodeToRefname(n)
  1687. addAnchorRst(p, name = linkName(n), target = n,
  1688. anchorType=manualInlineAnchor)
  1689. father.add(n)
  1690. elif isMarkdownCodeBlock(p):
  1691. father.add(parseMarkdownCodeblock(p))
  1692. elif isInlineMarkupStart(p, "``"):
  1693. var n = newRstNode(rnInlineLiteral)
  1694. parseUntil(p, n, "``", false)
  1695. father.add(n)
  1696. elif match(p, p.idx, ":w:") and
  1697. (var lastIdx = getRefnameIdx(p, p.idx + 1);
  1698. p.tok[lastIdx+2].symbol == "`"):
  1699. let (roleName, _) = getRefname(p, p.idx+1)
  1700. let k = whichRole(p, roleName)
  1701. var n = newRstNode(k)
  1702. p.idx = lastIdx + 2
  1703. if k == rnInlineCode:
  1704. n = n.toInlineCode(language=roleName)
  1705. parseUntil(p, n, "`", false) # bug #17260
  1706. if k in {rnUnknownRole, rnCodeFragment}:
  1707. n = n.toOtherRole(k, roleName)
  1708. father.add(n)
  1709. elif isInlineMarkupStart(p, "`"):
  1710. var n = newRstNode(rnInterpretedText, info=lineInfo(p, p.idx+1))
  1711. parseUntil(p, n, "`", false) # bug #17260
  1712. n = parsePostfix(p, n)
  1713. father.add(n)
  1714. elif isInlineMarkupStart(p, "|"):
  1715. var n = newRstNode(rnSubstitutionReferences, info=lineInfo(p, p.idx+1))
  1716. parseUntil(p, n, "|", false)
  1717. father.add(n)
  1718. elif currentTok(p).symbol == "[" and nextTok(p).symbol != "[" and
  1719. (n = parseFootnoteName(p, reference=true); n != nil):
  1720. var nn = newRstNode(rnFootnoteRef)
  1721. nn.info = lineInfo(p, saveIdx+1)
  1722. nn.add n
  1723. let (fnType, _) = getFootnoteType(p.s, n)
  1724. case fnType
  1725. of fnAutoSymbol:
  1726. p.s.lineFootnoteSymRef.add lineInfo(p)
  1727. of fnAutoNumber:
  1728. p.s.lineFootnoteNumRef.add lineInfo(p)
  1729. else: discard
  1730. father.add(nn)
  1731. elif roSupportMarkdown in p.s.options and
  1732. currentTok(p).symbol == "[" and nextTok(p).symbol != "[" and
  1733. parseMarkdownLink(p, father):
  1734. discard "parseMarkdownLink already processed it"
  1735. else:
  1736. if roSupportSmilies in p.s.options:
  1737. let n = parseSmiley(p)
  1738. if n != nil:
  1739. father.add(n)
  1740. return
  1741. parseBackslash(p, father)
  1742. of tkWord:
  1743. if roSupportSmilies in p.s.options:
  1744. let n = parseSmiley(p)
  1745. if n != nil:
  1746. father.add(n)
  1747. return
  1748. parseWordOrRef(p, father)
  1749. of tkAdornment, tkOther, tkWhite:
  1750. if isMarkdownCodeBlock(p):
  1751. father.add(parseMarkdownCodeblock(p))
  1752. return
  1753. if roSupportSmilies in p.s.options:
  1754. let n = parseSmiley(p)
  1755. if n != nil:
  1756. father.add(n)
  1757. return
  1758. father.add(newLeaf(p))
  1759. inc p.idx
  1760. else: discard
  1761. proc getDirective(p: var RstParser): string =
  1762. result = ""
  1763. if currentTok(p).kind == tkWhite:
  1764. let (name, lastIdx) = getRefname(p, p.idx + 1)
  1765. let afterIdx = lastIdx + 1
  1766. if name.len > 0:
  1767. if p.tok[afterIdx].symbol == "::":
  1768. result = name
  1769. p.idx = afterIdx + 1
  1770. if currentTok(p).kind == tkWhite:
  1771. inc p.idx
  1772. elif currentTok(p).kind != tkIndent:
  1773. rstMessage(p, mwRstStyle,
  1774. "whitespace or newline expected after directive " & name)
  1775. result = result.toLowerAscii()
  1776. elif p.tok[afterIdx].symbol == ":":
  1777. rstMessage(p, mwRstStyle,
  1778. "double colon :: may be missing at end of '" & name & "'",
  1779. p.tok[afterIdx].line, p.tok[afterIdx].col)
  1780. elif p.tok[afterIdx].kind == tkPunct and p.tok[afterIdx].symbol[0] == ':':
  1781. rstMessage(p, mwRstStyle,
  1782. "too many colons for a directive (should be ::)",
  1783. p.tok[afterIdx].line, p.tok[afterIdx].col)
  1784. proc parseComment(p: var RstParser, col: int): PRstNode =
  1785. if currentTok(p).kind != tkEof and nextTok(p).kind == tkIndent:
  1786. inc p.idx # empty comment
  1787. else:
  1788. while currentTok(p).kind != tkEof:
  1789. if currentTok(p).kind == tkIndent and currentTok(p).ival > col or
  1790. currentTok(p).kind != tkIndent and currentTok(p).col > col:
  1791. inc p.idx
  1792. else:
  1793. break
  1794. result = nil
  1795. proc parseLine(p: var RstParser, father: PRstNode) =
  1796. while true:
  1797. case currentTok(p).kind
  1798. of tkWhite, tkWord, tkOther, tkPunct: parseInline(p, father)
  1799. else: break
  1800. proc parseUntilNewline(p: var RstParser, father: PRstNode) =
  1801. while true:
  1802. case currentTok(p).kind
  1803. of tkWhite, tkWord, tkAdornment, tkOther, tkPunct: parseInline(p, father)
  1804. of tkEof, tkIndent: break
  1805. proc parseSection(p: var RstParser, result: PRstNode) {.gcsafe.}
  1806. proc tokenAfterNewline(p: RstParser, start: int): int =
  1807. result = start
  1808. while true:
  1809. case p.tok[result].kind
  1810. of tkEof:
  1811. break
  1812. of tkIndent:
  1813. inc result
  1814. break
  1815. else: inc result
  1816. proc tokenAfterNewline(p: RstParser): int {.inline.} =
  1817. result = tokenAfterNewline(p, p.idx)
  1818. proc getWrappableIndent(p: RstParser): int =
  1819. ## Gets baseline indentation for bodies of field lists and directives.
  1820. ## Handles situations like this (with possible de-indent in [case.3])::
  1821. ##
  1822. ## :field: definition [case.1]
  1823. ##
  1824. ## currInd currentTok(p).col
  1825. ## | |
  1826. ## v v
  1827. ##
  1828. ## .. Note:: defItem: [case.2]
  1829. ## definition
  1830. ##
  1831. ## ^
  1832. ## |
  1833. ## nextIndent
  1834. ##
  1835. ## .. Note:: - point1 [case.3]
  1836. ## - point 2
  1837. ##
  1838. ## ^
  1839. ## |
  1840. ## nextIndent
  1841. if currentTok(p).kind == tkIndent:
  1842. result = currentTok(p).ival
  1843. else:
  1844. var nextIndent = p.tok[tokenAfterNewline(p)-1].ival
  1845. if nextIndent <= currInd(p): # parse only this line [case.1]
  1846. result = currentTok(p).col
  1847. elif nextIndent >= currentTok(p).col: # may be a definition list [case.2]
  1848. result = currentTok(p).col
  1849. else:
  1850. result = nextIndent # allow parsing next lines [case.3]
  1851. proc getMdBlockIndent(p: RstParser): int =
  1852. ## Markdown version of `getWrappableIndent`.
  1853. if currentTok(p).kind == tkIndent:
  1854. result = currentTok(p).ival
  1855. else:
  1856. var nextIndent = p.tok[tokenAfterNewline(p)-1].ival
  1857. # TODO: Markdown-compliant definition should allow nextIndent == currInd(p):
  1858. if nextIndent <= currInd(p): # parse only this line
  1859. result = currentTok(p).col
  1860. else:
  1861. result = nextIndent # allow parsing next lines [case.3]
  1862. proc indFollows(p: RstParser): bool =
  1863. result = currentTok(p).kind == tkIndent and currentTok(p).ival > currInd(p)
  1864. proc parseBlockContent(p: var RstParser, father: var PRstNode,
  1865. contentParser: SectionParser): bool {.gcsafe.} =
  1866. ## parse the final content part of explicit markup blocks (directives,
  1867. ## footnotes, etc). Returns true if succeeded.
  1868. if currentTok(p).kind != tkIndent or indFollows(p):
  1869. let blockIndent = getWrappableIndent(p)
  1870. pushInd(p, blockIndent)
  1871. let content = contentParser(p)
  1872. popInd(p)
  1873. father.add content
  1874. result = true
  1875. proc parseSectionWrapper(p: var RstParser): PRstNode =
  1876. result = newRstNode(rnInner)
  1877. parseSection(p, result)
  1878. while result.kind == rnInner and result.len == 1:
  1879. result = result.sons[0]
  1880. proc parseField(p: var RstParser): PRstNode =
  1881. ## Returns a parsed rnField node.
  1882. ##
  1883. ## rnField nodes have two children nodes, a rnFieldName and a rnFieldBody.
  1884. result = newRstNode(rnField, info=lineInfo(p))
  1885. var col = currentTok(p).col
  1886. var fieldname = newRstNode(rnFieldName)
  1887. parseUntil(p, fieldname, ":", false)
  1888. var fieldbody = newRstNode(rnFieldBody)
  1889. if currentTok(p).kind == tkWhite: inc p.idx
  1890. let indent = getWrappableIndent(p)
  1891. if indent > col:
  1892. pushInd(p, indent)
  1893. parseSection(p, fieldbody)
  1894. popInd(p)
  1895. result.add(fieldname)
  1896. result.add(fieldbody)
  1897. proc parseFields(p: var RstParser): PRstNode =
  1898. ## Parses fields for a section or directive block.
  1899. ##
  1900. ## This proc may return nil if the parsing doesn't find anything of value,
  1901. ## otherwise it will return a node of rnFieldList type with children.
  1902. result = nil
  1903. var atStart = p.idx == 0 and p.tok[0].symbol == ":"
  1904. if currentTok(p).kind == tkIndent and nextTok(p).symbol == ":" or
  1905. atStart:
  1906. var col = if atStart: currentTok(p).col else: currentTok(p).ival
  1907. result = newRstNodeA(p, rnFieldList)
  1908. if not atStart: inc p.idx
  1909. while true:
  1910. result.add(parseField(p))
  1911. if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
  1912. nextTok(p).symbol == ":":
  1913. inc p.idx
  1914. else:
  1915. break
  1916. proc getFieldValue*(n: PRstNode): string =
  1917. ## Returns the value of a specific ``rnField`` node.
  1918. ##
  1919. ## This proc will assert if the node is not of the expected type. The empty
  1920. ## string will be returned as a minimum. Any value in the rst will be
  1921. ## stripped form leading/trailing whitespace.
  1922. assert n.kind == rnField
  1923. assert n.len == 2
  1924. assert n.sons[0].kind == rnFieldName
  1925. assert n.sons[1].kind == rnFieldBody
  1926. result = addNodes(n.sons[1]).strip
  1927. proc getFieldValue(n: PRstNode, fieldname: string): string =
  1928. if n.sons[1] == nil: return
  1929. if n.sons[1].kind != rnFieldList:
  1930. #InternalError("getFieldValue (2): " & $n.sons[1].kind)
  1931. # We don't like internal errors here anymore as that would break the forum!
  1932. return
  1933. for i in 0 ..< n.sons[1].len:
  1934. var f = n.sons[1].sons[i]
  1935. if cmpIgnoreStyle(addNodes(f.sons[0]), fieldname) == 0:
  1936. result = addNodes(f.sons[1])
  1937. if result == "": result = "\x01\x01" # indicates that the field exists
  1938. return
  1939. proc getArgument(n: PRstNode): string =
  1940. if n.sons[0] == nil: result = ""
  1941. else: result = addNodes(n.sons[0])
  1942. proc parseDotDot(p: var RstParser): PRstNode {.gcsafe.}
  1943. proc parseLiteralBlock(p: var RstParser): PRstNode =
  1944. result = newRstNodeA(p, rnLiteralBlock)
  1945. var n = newLeaf("")
  1946. if currentTok(p).kind == tkIndent:
  1947. var indent = currentTok(p).ival
  1948. while currentTok(p).kind == tkIndent: inc p.idx # skip blank lines
  1949. while true:
  1950. case currentTok(p).kind
  1951. of tkEof:
  1952. break
  1953. of tkIndent:
  1954. if currentTok(p).ival < indent:
  1955. break
  1956. else:
  1957. n.text.add("\n")
  1958. n.text.add(spaces(currentTok(p).ival - indent))
  1959. inc p.idx
  1960. else:
  1961. n.text.add(currentTok(p).symbol)
  1962. inc p.idx
  1963. else:
  1964. while currentTok(p).kind notin {tkIndent, tkEof}:
  1965. n.text.add(currentTok(p).symbol)
  1966. inc p.idx
  1967. result.add(n)
  1968. proc parseQuotedLiteralBlock(p: var RstParser): PRstNode =
  1969. result = newRstNodeA(p, rnLiteralBlock)
  1970. var n = newLeaf("")
  1971. if currentTok(p).kind == tkIndent:
  1972. var indent = currInd(p)
  1973. while currentTok(p).kind == tkIndent: inc p.idx # skip blank lines
  1974. var quoteSym = currentTok(p).symbol[0]
  1975. while true:
  1976. case currentTok(p).kind
  1977. of tkEof:
  1978. break
  1979. of tkIndent:
  1980. if currentTok(p).ival < indent:
  1981. break
  1982. elif currentTok(p).ival == indent:
  1983. if nextTok(p).kind == tkPunct and nextTok(p).symbol[0] == quoteSym:
  1984. n.text.add("\n")
  1985. inc p.idx
  1986. elif nextTok(p).kind == tkIndent:
  1987. break
  1988. else:
  1989. rstMessage(p, mwRstStyle, "no newline after quoted literal block")
  1990. break
  1991. else:
  1992. rstMessage(p, mwRstStyle,
  1993. "unexpected indentation in quoted literal block")
  1994. break
  1995. else:
  1996. n.text.add(currentTok(p).symbol)
  1997. inc p.idx
  1998. result.add(n)
  1999. proc parseRstLiteralBlock(p: var RstParser, kind: LiteralBlockKind): PRstNode =
  2000. if kind == lbIndentedLiteralBlock:
  2001. result = parseLiteralBlock(p)
  2002. else:
  2003. result = parseQuotedLiteralBlock(p)
  2004. proc getLevel(p: var RstParser, c: char, hasOverline: bool): int =
  2005. ## Returns (preliminary) heading level corresponding to `c` and
  2006. ## `hasOverline`. If level does not exist, add it first.
  2007. for i, hType in p.s.hLevels:
  2008. if hType.symbol == c and hType.hasOverline == hasOverline:
  2009. p.s.hLevels[i].line = curLine(p)
  2010. p.s.hLevels[i].hasPeers = true
  2011. return i
  2012. p.s.hLevels.add LevelInfo(symbol: c, hasOverline: hasOverline,
  2013. line: curLine(p), hasPeers: false)
  2014. result = p.s.hLevels.len - 1
  2015. proc countTitles(s: PRstSharedState, n: PRstNode) =
  2016. ## Fill `s.hTitleCnt`
  2017. if n == nil: return
  2018. for node in n.sons:
  2019. if node != nil:
  2020. if node.kind notin {rnOverline, rnSubstitutionDef, rnDefaultRole}:
  2021. break
  2022. if node.kind == rnOverline:
  2023. if s.hLevels[s.hTitleCnt].hasPeers:
  2024. break
  2025. inc s.hTitleCnt
  2026. if s.hTitleCnt >= 2:
  2027. break
  2028. proc isAdornmentHeadline(p: RstParser, adornmentIdx: int): bool =
  2029. ## check that underline/overline length is enough for the heading.
  2030. ## No support for Unicode.
  2031. if p.tok[adornmentIdx].symbol in ["::", "..", "|"]:
  2032. return false
  2033. if isMarkdownCodeBlock(p, adornmentIdx):
  2034. return false
  2035. var headlineLen = 0
  2036. var failure = ""
  2037. if p.idx < adornmentIdx: # check for underline
  2038. if p.idx > 0:
  2039. headlineLen = currentTok(p).col - p.tok[adornmentIdx].col
  2040. if headlineLen > 0:
  2041. rstMessage(p, mwRstStyle, "indentation of heading text allowed" &
  2042. " only for overline titles")
  2043. for i in p.idx ..< adornmentIdx-1: # adornmentIdx-1 is a linebreak
  2044. headlineLen += p.tok[i].symbol.len
  2045. result = p.tok[adornmentIdx].symbol.len >= headlineLen and headlineLen != 0
  2046. if not result:
  2047. failure = "(underline '" & p.tok[adornmentIdx].symbol & "' is too short)"
  2048. else: # p.idx == adornmentIdx, at overline. Check overline and underline
  2049. var i = p.idx + 2
  2050. headlineLen = p.tok[i].col - p.tok[adornmentIdx].col
  2051. while p.tok[i].kind notin {tkEof, tkIndent}:
  2052. headlineLen += p.tok[i].symbol.len
  2053. inc i
  2054. if p.tok[i].kind == tkIndent and
  2055. p.tok[i+1].kind == tkAdornment and
  2056. p.tok[i+1].symbol[0] == p.tok[adornmentIdx].symbol[0]:
  2057. result = p.tok[adornmentIdx].symbol.len >= headlineLen and
  2058. headlineLen != 0
  2059. if result:
  2060. result = p.tok[i+1].symbol == p.tok[adornmentIdx].symbol
  2061. if not result:
  2062. failure = "(underline '" & p.tok[i+1].symbol & "' does not match " &
  2063. "overline '" & p.tok[adornmentIdx].symbol & "')"
  2064. else:
  2065. failure = "(overline '" & p.tok[adornmentIdx].symbol & "' is too short)"
  2066. else: # it's not overline/underline section, not reporting error
  2067. return false
  2068. if not result:
  2069. rstMessage(p, meNewSectionExpected, failure)
  2070. proc isLineBlock(p: RstParser): bool =
  2071. var j = tokenAfterNewline(p)
  2072. result = currentTok(p).col == p.tok[j].col and p.tok[j].symbol == "|" or
  2073. p.tok[j].col > currentTok(p).col or
  2074. p.tok[j].symbol == "\n"
  2075. proc isMarkdownBlockQuote(p: RstParser): bool =
  2076. result = currentTok(p).symbol[0] == '>'
  2077. proc whichRstLiteralBlock(p: RstParser): LiteralBlockKind =
  2078. ## Checks that the following tokens are either Indented Literal Block or
  2079. ## Quoted Literal Block (which is not quite the same as Markdown quote block).
  2080. ## https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#quoted-literal-blocks
  2081. if currentTok(p).symbol == "::" and nextTok(p).kind == tkIndent:
  2082. if currInd(p) > nextTok(p).ival:
  2083. result = lbNone
  2084. if currInd(p) < nextTok(p).ival:
  2085. result = lbIndentedLiteralBlock
  2086. elif currInd(p) == nextTok(p).ival:
  2087. var i = p.idx + 1
  2088. while p.tok[i].kind == tkIndent: inc i
  2089. const validQuotingCharacters = {
  2090. '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-',
  2091. '.', '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^',
  2092. '_', '`', '{', '|', '}', '~'}
  2093. if p.tok[i].kind in {tkPunct, tkAdornment} and
  2094. p.tok[i].symbol[0] in validQuotingCharacters:
  2095. result = lbQuotedLiteralBlock
  2096. else:
  2097. result = lbNone
  2098. else:
  2099. result = lbNone
  2100. proc predNL(p: RstParser): bool =
  2101. result = true
  2102. if p.idx > 0:
  2103. result = prevTok(p).kind == tkIndent and
  2104. prevTok(p).ival == currInd(p)
  2105. proc isDefList(p: RstParser): bool =
  2106. var j = tokenAfterNewline(p)
  2107. result = currentTok(p).col < p.tok[j].col and
  2108. p.tok[j].kind in {tkWord, tkOther, tkPunct} and
  2109. p.tok[j - 2].symbol != "::"
  2110. proc `$`(t: Token): string = # for debugging only
  2111. result = "(" & $t.kind & " line=" & $t.line & " col=" & $t.col
  2112. if t.kind == tkIndent: result = result & " ival=" & $t.ival & ")"
  2113. else: result = result & " symbol=" & t.symbol & ")"
  2114. proc skipNewlines(p: RstParser, j: int): int =
  2115. result = j
  2116. while p.tok[result].kind != tkEof and p.tok[result].kind == tkIndent:
  2117. inc result # skip blank lines
  2118. proc skipNewlines(p: var RstParser) =
  2119. p.idx = skipNewlines(p, p.idx)
  2120. const maxMdRelInd = 3 ## In Markdown: maximum indentation that does not yet
  2121. ## make the indented block a code
  2122. proc isMdRelInd(outerInd, nestedInd: int): bool =
  2123. result = outerInd <= nestedInd and nestedInd <= outerInd + maxMdRelInd
  2124. proc isMdDefBody(p: RstParser, j: int, termCol: int): bool =
  2125. let defCol = p.tok[j].col
  2126. result = p.tok[j].symbol == ":" and
  2127. isMdRelInd(termCol, defCol) and
  2128. p.tok[j+1].kind == tkWhite and
  2129. p.tok[j+2].kind in {tkWord, tkOther, tkPunct}
  2130. proc isMdDefListItem(p: RstParser, idx: int): bool =
  2131. var j = tokenAfterNewline(p, idx)
  2132. j = skipNewlines(p, j)
  2133. let termCol = p.tok[j].col
  2134. result = isMdRelInd(currInd(p), termCol) and
  2135. isMdDefBody(p, j, termCol)
  2136. proc isOptionList(p: RstParser): bool =
  2137. result = match(p, p.idx, "-w") or match(p, p.idx, "--w") or
  2138. match(p, p.idx, "/w") or match(p, p.idx, "//w")
  2139. proc isMarkdownHeadlinePattern(s: string): bool =
  2140. if s.len >= 1 and s.len <= 6:
  2141. for c in s:
  2142. if c != '#': return false
  2143. result = true
  2144. proc isMarkdownHeadline(p: RstParser): bool =
  2145. if roSupportMarkdown in p.s.options:
  2146. if isMarkdownHeadlinePattern(currentTok(p).symbol) and nextTok(p).kind == tkWhite:
  2147. if p.tok[p.idx+2].kind in {tkWord, tkOther, tkPunct}:
  2148. result = true
  2149. proc findPipe(p: RstParser, start: int): bool =
  2150. var i = start
  2151. while true:
  2152. if p.tok[i].symbol == "|": return true
  2153. if p.tok[i].kind in {tkIndent, tkEof}: return false
  2154. inc i
  2155. proc whichSection(p: RstParser): RstNodeKind =
  2156. if currentTok(p).kind in {tkAdornment, tkPunct}:
  2157. # for punctuation sequences that can be both tkAdornment and tkPunct
  2158. if isMarkdownCodeBlock(p):
  2159. return rnCodeBlock
  2160. elif isRst(p) and currentTok(p).symbol == "::":
  2161. return rnLiteralBlock
  2162. elif currentTok(p).symbol == ".." and
  2163. nextTok(p).kind in {tkWhite, tkIndent}:
  2164. return rnDirective
  2165. case currentTok(p).kind
  2166. of tkAdornment:
  2167. if match(p, p.idx + 1, "iI") and currentTok(p).symbol.len >= 4:
  2168. result = rnTransition
  2169. elif match(p, p.idx, "+a+"):
  2170. result = rnGridTable
  2171. rstMessage(p, meGridTableNotImplemented)
  2172. elif match(p, p.idx + 1, " a"): result = rnTable
  2173. elif currentTok(p).symbol == "|" and isLineBlock(p):
  2174. result = rnLineBlock
  2175. elif roSupportMarkdown in p.s.options and isMarkdownBlockQuote(p):
  2176. result = rnMarkdownBlockQuote
  2177. elif (match(p, p.idx + 1, "i") and not match(p, p.idx + 2, "I")) and
  2178. isAdornmentHeadline(p, p.idx):
  2179. result = rnOverline
  2180. else:
  2181. result = rnParagraph
  2182. of tkPunct:
  2183. if isMarkdownHeadline(p):
  2184. result = rnMarkdownHeadline
  2185. elif roSupportMarkdown in p.s.options and predNL(p) and
  2186. match(p, p.idx, "| w") and findPipe(p, p.idx+3):
  2187. result = rnMarkdownTable
  2188. elif isMd(p) and isMdFootnoteName(p, reference=false):
  2189. result = rnFootnote
  2190. elif currentTok(p).symbol == "|" and isLineBlock(p):
  2191. result = rnLineBlock
  2192. elif roSupportMarkdown in p.s.options and isMarkdownBlockQuote(p):
  2193. result = rnMarkdownBlockQuote
  2194. elif match(p, tokenAfterNewline(p), "aI") and
  2195. isAdornmentHeadline(p, tokenAfterNewline(p)):
  2196. result = rnHeadline
  2197. elif currentTok(p).symbol in ["+", "*", "-"] and nextTok(p).kind == tkWhite:
  2198. result = rnBulletList
  2199. elif match(p, p.idx, ":w:E"):
  2200. # (currentTok(p).symbol == ":")
  2201. result = rnFieldList
  2202. elif match(p, p.idx, "(e) ") or match(p, p.idx, "e) ") or
  2203. match(p, p.idx, "e. "):
  2204. result = rnEnumList
  2205. elif isOptionList(p):
  2206. result = rnOptionList
  2207. elif isRst(p) and isDefList(p):
  2208. result = rnDefList
  2209. elif isMd(p) and isMdDefListItem(p, p.idx):
  2210. result = rnMdDefList
  2211. else:
  2212. result = rnParagraph
  2213. of tkWord, tkOther, tkWhite:
  2214. let tokIdx = tokenAfterNewline(p)
  2215. if match(p, tokIdx, "aI"):
  2216. if isAdornmentHeadline(p, tokIdx): result = rnHeadline
  2217. else: result = rnParagraph
  2218. elif match(p, p.idx, "e) ") or match(p, p.idx, "e. "): result = rnEnumList
  2219. elif isRst(p) and isDefList(p): result = rnDefList
  2220. elif isMd(p) and isMdDefListItem(p, p.idx):
  2221. result = rnMdDefList
  2222. else: result = rnParagraph
  2223. else: result = rnLeaf
  2224. proc parseLineBlock(p: var RstParser): PRstNode =
  2225. ## Returns rnLineBlock with all sons of type rnLineBlockItem
  2226. result = nil
  2227. if nextTok(p).kind in {tkWhite, tkIndent}:
  2228. var col = currentTok(p).col
  2229. result = newRstNodeA(p, rnLineBlock)
  2230. while true:
  2231. var item = newRstNode(rnLineBlockItem)
  2232. if nextTok(p).kind == tkWhite:
  2233. if nextTok(p).symbol.len > 1: # pass additional indentation after '| '
  2234. item.lineIndent = nextTok(p).symbol
  2235. inc p.idx, 2
  2236. pushInd(p, p.tok[p.idx].col)
  2237. parseSection(p, item)
  2238. popInd(p)
  2239. else: # tkIndent => add an empty line
  2240. item.lineIndent = "\n"
  2241. inc p.idx, 1
  2242. result.add(item)
  2243. if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
  2244. nextTok(p).symbol == "|" and
  2245. p.tok[p.idx + 2].kind in {tkWhite, tkIndent}:
  2246. inc p.idx, 1
  2247. else:
  2248. break
  2249. proc parseDoc(p: var RstParser): PRstNode {.gcsafe.}
  2250. proc getQuoteSymbol(p: RstParser, idx: int): tuple[sym: string, depth: int, tokens: int] =
  2251. result = ("", 0, 0)
  2252. var i = idx
  2253. result.sym &= p.tok[i].symbol
  2254. result.depth += p.tok[i].symbol.len
  2255. inc result.tokens
  2256. inc i
  2257. while p.tok[i].kind == tkWhite and i+1 < p.tok.len and
  2258. p.tok[i+1].kind == tkPunct and p.tok[i+1].symbol[0] == '>':
  2259. result.sym &= p.tok[i].symbol
  2260. result.sym &= p.tok[i+1].symbol
  2261. result.depth += p.tok[i+1].symbol.len
  2262. inc result.tokens, 2
  2263. inc i, 2
  2264. proc parseMarkdownQuoteSegment(p: var RstParser, curSym: string, col: int):
  2265. PRstNode =
  2266. ## We define *segment* as a group of lines that starts with exactly the
  2267. ## same quote symbol. If the following lines don't contain any `>` (*lazy*
  2268. ## continuation) they considered as continuation of the current segment.
  2269. var q: RstParser # to delete `>` at a start of line and then parse normally
  2270. initParser(q, p.s)
  2271. q.col = p.col
  2272. q.line = p.line
  2273. var minCol = int.high # minimum colum num in the segment
  2274. while true: # move tokens of segment from `p` to `q` skipping `curSym`
  2275. case currentTok(p).kind
  2276. of tkEof:
  2277. break
  2278. of tkIndent:
  2279. if nextTok(p).kind in {tkIndent, tkEof}:
  2280. break
  2281. else:
  2282. if nextTok(p).symbol[0] == '>':
  2283. var (quoteSym, _, quoteTokens) = getQuoteSymbol(p, p.idx + 1)
  2284. if quoteSym == curSym: # the segment continues
  2285. var iTok = tokenAfterNewline(p, p.idx+1)
  2286. if p.tok[iTok].kind notin {tkEof, tkIndent} and
  2287. p.tok[iTok].symbol[0] != '>':
  2288. rstMessage(p, mwRstStyle,
  2289. "two or more quoted lines are followed by unquoted line " &
  2290. $(curLine(p) + 1))
  2291. break
  2292. q.tok.add currentTok(p)
  2293. var ival = currentTok(p).ival + quoteSym.len
  2294. inc p.idx, (1 + quoteTokens) # skip newline and > > >
  2295. if currentTok(p).kind == tkWhite:
  2296. ival += currentTok(p).symbol.len
  2297. inc p.idx
  2298. # fix up previous `tkIndent`s to ival (as if >>> were not there)
  2299. var j = q.tok.len - 1
  2300. while j >= 0 and q.tok[j].kind == tkIndent:
  2301. q.tok[j].ival = ival
  2302. dec j
  2303. else: # next segment started
  2304. break
  2305. elif currentTok(p).ival < col:
  2306. break
  2307. else: # the segment continues, a case like:
  2308. # > beginning
  2309. # continuation
  2310. q.tok.add currentTok(p)
  2311. inc p.idx
  2312. else:
  2313. if currentTok(p).col < minCol: minCol = currentTok(p).col
  2314. q.tok.add currentTok(p)
  2315. inc p.idx
  2316. q.indentStack = @[minCol]
  2317. # if initial indentation `minCol` is > 0 then final newlines
  2318. # should be omitted so that parseDoc could advance to the end of tokens:
  2319. var j = q.tok.len - 1
  2320. while q.tok[j].kind == tkIndent: dec j
  2321. q.tok.setLen (j+1)
  2322. q.tok.add Token(kind: tkEof, line: currentTok(p).line)
  2323. result = parseDoc(q)
  2324. proc parseMarkdownBlockQuote(p: var RstParser): PRstNode =
  2325. var (curSym, quotationDepth, quoteTokens) = getQuoteSymbol(p, p.idx)
  2326. let col = currentTok(p).col
  2327. result = newRstNodeA(p, rnMarkdownBlockQuote)
  2328. inc p.idx, quoteTokens # skip first >
  2329. while true:
  2330. var item = newRstNode(rnMarkdownBlockQuoteItem)
  2331. item.quotationDepth = quotationDepth
  2332. if currentTok(p).kind == tkWhite: inc p.idx
  2333. item.add parseMarkdownQuoteSegment(p, curSym, col)
  2334. result.add(item)
  2335. if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
  2336. nextTok(p).kind != tkEof and nextTok(p).symbol[0] == '>':
  2337. (curSym, quotationDepth, quoteTokens) = getQuoteSymbol(p, p.idx + 1)
  2338. inc p.idx, (1 + quoteTokens) # skip newline and > > >
  2339. else:
  2340. break
  2341. proc parseParagraph(p: var RstParser, result: PRstNode) =
  2342. while true:
  2343. case currentTok(p).kind
  2344. of tkIndent:
  2345. if nextTok(p).kind == tkIndent:
  2346. inc p.idx
  2347. break # blank line breaks paragraph for both Md & Rst
  2348. elif currentTok(p).ival == currInd(p) or (
  2349. isMd(p) and currentTok(p).ival > currInd(p)):
  2350. # (Md allows adding additional indentation inside paragraphs)
  2351. inc p.idx
  2352. case whichSection(p)
  2353. of rnParagraph, rnLeaf, rnHeadline, rnMarkdownHeadline,
  2354. rnOverline, rnDirective:
  2355. result.add newLeaf(" ")
  2356. of rnLineBlock:
  2357. result.addIfNotNil(parseLineBlock(p))
  2358. of rnMarkdownBlockQuote:
  2359. result.addIfNotNil(parseMarkdownBlockQuote(p))
  2360. else:
  2361. dec p.idx # allow subsequent block to be parsed as another section
  2362. break
  2363. else:
  2364. break
  2365. of tkPunct:
  2366. if isRst(p) and (
  2367. let literalBlockKind = whichRstLiteralBlock(p);
  2368. literalBlockKind != lbNone):
  2369. result.add newLeaf(":")
  2370. inc p.idx # skip '::'
  2371. result.add(parseRstLiteralBlock(p, literalBlockKind))
  2372. break
  2373. else:
  2374. parseInline(p, result)
  2375. of tkWhite, tkWord, tkAdornment, tkOther:
  2376. parseInline(p, result)
  2377. else: break
  2378. proc checkHeadingHierarchy(p: RstParser, lvl: int) =
  2379. if lvl - p.s.hCurLevel > 1: # broken hierarchy!
  2380. proc descr(l: int): string =
  2381. (if p.s.hLevels[l].hasOverline: "overline " else: "underline ") &
  2382. repeat(p.s.hLevels[l].symbol, 5)
  2383. var msg = "(section level inconsistent: "
  2384. msg.add descr(lvl) & " unexpectedly found, " &
  2385. "while the following intermediate section level(s) are missing on lines "
  2386. msg.add $p.s.hLevels[p.s.hCurLevel].line & ".." & $curLine(p) & ":"
  2387. for l in p.s.hCurLevel+1 .. lvl-1:
  2388. msg.add " " & descr(l)
  2389. if l != lvl-1: msg.add ","
  2390. rstMessage(p, meNewSectionExpected, msg & ")")
  2391. proc parseHeadline(p: var RstParser): PRstNode =
  2392. if isMarkdownHeadline(p):
  2393. result = newRstNode(rnMarkdownHeadline)
  2394. # Note that level hierarchy is not checked for markdown headings
  2395. result.level = currentTok(p).symbol.len
  2396. assert(nextTok(p).kind == tkWhite)
  2397. inc p.idx, 2
  2398. parseUntilNewline(p, result)
  2399. else:
  2400. result = newRstNode(rnHeadline)
  2401. parseUntilNewline(p, result)
  2402. assert(currentTok(p).kind == tkIndent)
  2403. assert(nextTok(p).kind == tkAdornment)
  2404. var c = nextTok(p).symbol[0]
  2405. inc p.idx, 2
  2406. result.level = getLevel(p, c, hasOverline=false)
  2407. checkHeadingHierarchy(p, result.level)
  2408. p.s.hCurLevel = result.level
  2409. addAnchorRst(p, linkName(result), result, anchorType=headlineAnchor)
  2410. p.s.tocPart.add result
  2411. proc parseOverline(p: var RstParser): PRstNode =
  2412. var c = currentTok(p).symbol[0]
  2413. inc p.idx, 2
  2414. result = newRstNode(rnOverline)
  2415. while true:
  2416. parseUntilNewline(p, result)
  2417. if currentTok(p).kind == tkIndent:
  2418. inc p.idx
  2419. if prevTok(p).ival > currInd(p):
  2420. result.add newLeaf(" ")
  2421. else:
  2422. break
  2423. else:
  2424. break
  2425. result.level = getLevel(p, c, hasOverline=true)
  2426. checkHeadingHierarchy(p, result.level)
  2427. p.s.hCurLevel = result.level
  2428. if currentTok(p).kind == tkAdornment:
  2429. inc p.idx
  2430. if currentTok(p).kind == tkIndent: inc p.idx
  2431. addAnchorRst(p, linkName(result), result, anchorType=headlineAnchor)
  2432. p.s.tocPart.add result
  2433. proc fixHeadlines(s: PRstSharedState) =
  2434. # Fix up section levels depending on presence of a title and subtitle:
  2435. for n in s.tocPart:
  2436. if n.kind in {rnHeadline, rnOverline}:
  2437. if s.hTitleCnt == 2:
  2438. if n.level == 1: # it's the subtitle
  2439. n.level = 0
  2440. elif n.level >= 2: # normal sections, start numbering from 1
  2441. n.level -= 1
  2442. elif s.hTitleCnt == 0:
  2443. n.level += 1
  2444. # Set headline anchors:
  2445. for iHeading in 0 .. s.tocPart.high:
  2446. let n: PRstNode = s.tocPart[iHeading]
  2447. if n.level >= 1:
  2448. n.anchor = rstnodeToRefname(n)
  2449. # Fix anchors for uniqueness if `.. contents::` is present
  2450. if s.hasToc:
  2451. # Find the last higher level section for unique reference name
  2452. var sectionPrefix = ""
  2453. for i in countdown(iHeading - 1, 0):
  2454. if s.tocPart[i].level >= 1 and s.tocPart[i].level < n.level:
  2455. sectionPrefix = rstnodeToRefname(s.tocPart[i]) & "-"
  2456. break
  2457. if sectionPrefix != "":
  2458. n.anchor = sectionPrefix & n.anchor
  2459. s.tocPart.setLen 0
  2460. type
  2461. ColSpec = object
  2462. start, stop: int
  2463. RstCols = seq[ColSpec]
  2464. ColumnLimits = tuple # for Markdown
  2465. first, last: int
  2466. ColSeq = seq[ColumnLimits]
  2467. proc tokStart(p: RstParser, idx: int): int =
  2468. result = p.tok[idx].col
  2469. proc tokStart(p: RstParser): int =
  2470. result = tokStart(p, p.idx)
  2471. proc tokEnd(p: RstParser, idx: int): int =
  2472. result = p.tok[idx].col + p.tok[idx].symbol.len - 1
  2473. proc tokEnd(p: RstParser): int =
  2474. result = tokEnd(p, p.idx)
  2475. proc getColumns(p: RstParser, cols: var RstCols, startIdx: int): int =
  2476. # Fills table column specification (or separator) `cols` and returns
  2477. # the next parser index after it.
  2478. var L = 0
  2479. result = startIdx
  2480. while true:
  2481. inc L
  2482. setLen(cols, L)
  2483. cols[L - 1].start = tokStart(p, result)
  2484. cols[L - 1].stop = tokEnd(p, result)
  2485. assert(p.tok[result].kind == tkAdornment)
  2486. inc result
  2487. if p.tok[result].kind != tkWhite: break
  2488. inc result
  2489. if p.tok[result].kind != tkAdornment: break
  2490. if p.tok[result].kind == tkIndent: inc result
  2491. proc checkColumns(p: RstParser, cols: RstCols) =
  2492. var i = p.idx
  2493. if p.tok[i].symbol[0] != '=':
  2494. stopOrWarn(p, meIllformedTable,
  2495. "only tables with `=` columns specification are allowed")
  2496. for col in 0 ..< cols.len:
  2497. if tokEnd(p, i) != cols[col].stop:
  2498. stopOrWarn(p, meIllformedTable,
  2499. "end of table column #$1 should end at position $2" % [
  2500. $(col+1), $(cols[col].stop+ColRstOffset)],
  2501. p.tok[i].line, tokEnd(p, i))
  2502. inc i
  2503. if col == cols.len - 1:
  2504. if p.tok[i].kind == tkWhite:
  2505. inc i
  2506. if p.tok[i].kind notin {tkIndent, tkEof}:
  2507. stopOrWarn(p, meIllformedTable, "extraneous column specification")
  2508. elif p.tok[i].kind == tkWhite:
  2509. inc i
  2510. else:
  2511. stopOrWarn(p, meIllformedTable,
  2512. "no enough table columns", p.tok[i].line, p.tok[i].col)
  2513. proc getSpans(p: RstParser, nextLine: int,
  2514. cols: RstCols, unitedCols: RstCols): seq[int] =
  2515. ## Calculates how many columns a joined cell occupies.
  2516. if unitedCols.len > 0:
  2517. result = newSeq[int](unitedCols.len)
  2518. var
  2519. iCell = 0
  2520. jCell = 0
  2521. uCell = 0
  2522. while jCell < cols.len:
  2523. if cols[jCell].stop < unitedCols[uCell].stop:
  2524. inc jCell
  2525. elif cols[jCell].stop == unitedCols[uCell].stop:
  2526. result[uCell] = jCell - iCell + 1
  2527. iCell = jCell + 1
  2528. jCell = jCell + 1
  2529. inc uCell
  2530. else:
  2531. rstMessage(p, meIllformedTable,
  2532. "spanning underline does not match main table columns",
  2533. p.tok[nextLine].line, p.tok[nextLine].col)
  2534. proc parseSimpleTableRow(p: var RstParser, cols: RstCols, colChar: char): PRstNode =
  2535. ## Parses 1 row in RST simple table.
  2536. # Consider that columns may be spanning (united by using underline like ----):
  2537. let nextLine = tokenAfterNewline(p)
  2538. var unitedCols: RstCols
  2539. var afterSpan: int
  2540. if p.tok[nextLine].kind == tkAdornment and p.tok[nextLine].symbol[0] == '-':
  2541. afterSpan = getColumns(p, unitedCols, nextLine)
  2542. if unitedCols == cols and p.tok[nextLine].symbol[0] == colChar:
  2543. # legacy rst.nim compat.: allow punctuation like `----` in main boundaries
  2544. afterSpan = nextLine
  2545. unitedCols.setLen 0
  2546. else:
  2547. afterSpan = nextLine
  2548. template colEnd(i): int =
  2549. if i == cols.len - 1: high(int) # last column has no limit
  2550. elif unitedCols.len > 0: unitedCols[i].stop else: cols[i].stop
  2551. template colStart(i): int =
  2552. if unitedCols.len > 0: unitedCols[i].start else: cols[i].start
  2553. var row = newSeq[string](if unitedCols.len > 0: unitedCols.len else: cols.len)
  2554. var spans: seq[int] = getSpans(p, nextLine, cols, unitedCols)
  2555. let line = currentTok(p).line
  2556. # Iterate over the lines a single cell may span:
  2557. while true:
  2558. var nCell = 0
  2559. # distribute tokens between cells in the current line:
  2560. while currentTok(p).kind notin {tkIndent, tkEof}:
  2561. if tokEnd(p) <= colEnd(nCell):
  2562. if tokStart(p) < colStart(nCell):
  2563. if currentTok(p).kind != tkWhite:
  2564. stopOrWarn(p, meIllformedTable,
  2565. "this word crosses table column from the left")
  2566. row[nCell].add(currentTok(p).symbol)
  2567. else:
  2568. row[nCell].add(currentTok(p).symbol)
  2569. inc p.idx
  2570. else:
  2571. if tokStart(p) < colEnd(nCell) and currentTok(p).kind != tkWhite:
  2572. stopOrWarn(p, meIllformedTable,
  2573. "this word crosses table column from the right")
  2574. row[nCell].add(currentTok(p).symbol)
  2575. inc p.idx
  2576. inc nCell
  2577. if currentTok(p).kind == tkIndent: inc p.idx
  2578. if tokEnd(p) <= colEnd(0): break
  2579. # Continued current cells because the 1st column is empty.
  2580. if currentTok(p).kind in {tkEof, tkAdornment}:
  2581. break
  2582. for nCell in countup(1, high(row)): row[nCell].add('\n')
  2583. result = newRstNode(rnTableRow)
  2584. var q: RstParser
  2585. for uCell in 0 ..< row.len:
  2586. initParser(q, p.s)
  2587. q.col = colStart(uCell)
  2588. q.line = line - 1
  2589. getTokens(row[uCell], q.tok)
  2590. let cell = newRstNode(rnTableDataCell)
  2591. cell.span = if spans.len == 0: 0 else: spans[uCell]
  2592. cell.add(parseDoc(q))
  2593. result.add(cell)
  2594. if afterSpan > p.idx:
  2595. p.idx = afterSpan
  2596. proc parseSimpleTable(p: var RstParser): PRstNode =
  2597. var cols: RstCols
  2598. result = newRstNodeA(p, rnTable)
  2599. let startIdx = getColumns(p, cols, p.idx)
  2600. let colChar = currentTok(p).symbol[0]
  2601. checkColumns(p, cols)
  2602. p.idx = startIdx
  2603. result.colCount = cols.len
  2604. while true:
  2605. if currentTok(p).kind == tkAdornment:
  2606. checkColumns(p, cols)
  2607. p.idx = tokenAfterNewline(p)
  2608. if currentTok(p).kind in {tkEof, tkIndent}:
  2609. # skip last adornment line:
  2610. break
  2611. if result.sons.len > 0: result.sons[^1].endsHeader = true
  2612. # fix rnTableDataCell -> rnTableHeaderCell for previous table rows:
  2613. for nRow in 0 ..< result.sons.len:
  2614. for nCell in 0 ..< result.sons[nRow].len:
  2615. template cell: PRstNode = result.sons[nRow].sons[nCell]
  2616. cell = PRstNode(kind: rnTableHeaderCell, sons: cell.sons,
  2617. span: cell.span, anchor: cell.anchor)
  2618. if currentTok(p).kind == tkEof: break
  2619. let tabRow = parseSimpleTableRow(p, cols, colChar)
  2620. result.add tabRow
  2621. proc readTableRow(p: var RstParser): ColSeq =
  2622. if currentTok(p).symbol == "|": inc p.idx
  2623. while currentTok(p).kind notin {tkIndent, tkEof}:
  2624. var limits: ColumnLimits
  2625. limits.first = p.idx
  2626. while currentTok(p).kind notin {tkIndent, tkEof}:
  2627. if currentTok(p).symbol == "|" and prevTok(p).symbol != "\\": break
  2628. inc p.idx
  2629. limits.last = p.idx
  2630. result.add(limits)
  2631. if currentTok(p).kind in {tkIndent, tkEof}: break
  2632. inc p.idx
  2633. p.idx = tokenAfterNewline(p)
  2634. proc getColContents(p: var RstParser, colLim: ColumnLimits): string =
  2635. for i in colLim.first ..< colLim.last:
  2636. result.add(p.tok[i].symbol)
  2637. result.strip
  2638. proc isValidDelimiterRow(p: var RstParser, colNum: int): bool =
  2639. let row = readTableRow(p)
  2640. if row.len != colNum: return false
  2641. for limits in row:
  2642. let content = getColContents(p, limits)
  2643. if content.len < 3 or not (content.startsWith("--") or content.startsWith(":-")):
  2644. return false
  2645. return true
  2646. proc parseMarkdownTable(p: var RstParser): PRstNode =
  2647. var
  2648. row: ColSeq
  2649. a, b: PRstNode
  2650. q: RstParser
  2651. result = newRstNodeA(p, rnMarkdownTable)
  2652. proc parseRow(p: var RstParser, cellKind: RstNodeKind, result: PRstNode) =
  2653. row = readTableRow(p)
  2654. if result.colCount == 0: result.colCount = row.len # table header
  2655. elif row.len < result.colCount: row.setLen(result.colCount)
  2656. a = newRstNode(rnTableRow)
  2657. for j in 0 ..< result.colCount:
  2658. b = newRstNode(cellKind)
  2659. initParser(q, p.s)
  2660. q.col = p.col
  2661. q.line = currentTok(p).line - 1
  2662. getTokens(getColContents(p, row[j]), q.tok)
  2663. b.add(parseDoc(q))
  2664. a.add(b)
  2665. result.add(a)
  2666. parseRow(p, rnTableHeaderCell, result)
  2667. if not isValidDelimiterRow(p, result.colCount):
  2668. rstMessage(p, meMarkdownIllformedTable)
  2669. while predNL(p) and currentTok(p).symbol == "|":
  2670. parseRow(p, rnTableDataCell, result)
  2671. proc parseTransition(p: var RstParser): PRstNode =
  2672. result = newRstNodeA(p, rnTransition)
  2673. inc p.idx
  2674. if currentTok(p).kind == tkIndent: inc p.idx
  2675. if currentTok(p).kind == tkIndent: inc p.idx
  2676. proc parseBulletList(p: var RstParser): PRstNode =
  2677. result = nil
  2678. if nextTok(p).kind == tkWhite:
  2679. var bullet = currentTok(p).symbol
  2680. var col = currentTok(p).col
  2681. result = newRstNodeA(p, rnBulletList)
  2682. pushInd(p, p.tok[p.idx + 2].col)
  2683. inc p.idx, 2
  2684. while true:
  2685. var item = newRstNode(rnBulletItem)
  2686. parseSection(p, item)
  2687. result.add(item)
  2688. if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
  2689. nextTok(p).symbol == bullet and
  2690. p.tok[p.idx + 2].kind == tkWhite:
  2691. inc p.idx, 3
  2692. else:
  2693. break
  2694. popInd(p)
  2695. proc parseOptionList(p: var RstParser): PRstNode =
  2696. result = newRstNodeA(p, rnOptionList)
  2697. let col = currentTok(p).col
  2698. var order = 1
  2699. while true:
  2700. if currentTok(p).col == col and isOptionList(p):
  2701. var a = newRstNode(rnOptionGroup)
  2702. var b = newRstNode(rnDescription)
  2703. var c = newRstNode(rnOptionListItem)
  2704. if match(p, p.idx, "//w"): inc p.idx
  2705. while currentTok(p).kind notin {tkIndent, tkEof}:
  2706. if currentTok(p).kind == tkWhite and currentTok(p).symbol.len > 1:
  2707. inc p.idx
  2708. break
  2709. a.add(newLeaf(p))
  2710. inc p.idx
  2711. var j = tokenAfterNewline(p)
  2712. if j > 0 and p.tok[j - 1].kind == tkIndent and p.tok[j - 1].ival > currInd(p):
  2713. pushInd(p, p.tok[j - 1].ival)
  2714. parseSection(p, b)
  2715. popInd(p)
  2716. else:
  2717. parseLine(p, b)
  2718. while currentTok(p).kind == tkIndent: inc p.idx
  2719. c.add(a)
  2720. c.add(b)
  2721. c.order = order; inc order
  2722. result.add(c)
  2723. else:
  2724. if currentTok(p).kind != tkEof: dec p.idx # back to tkIndent
  2725. break
  2726. proc parseMdDefinitionList(p: var RstParser): PRstNode =
  2727. ## Parses (Pandoc/kramdown/PHPextra) Markdown definition lists.
  2728. result = newRstNodeA(p, rnMdDefList)
  2729. let termCol = currentTok(p).col
  2730. while true:
  2731. var item = newRstNode(rnDefItem)
  2732. var term = newRstNode(rnDefName)
  2733. parseLine(p, term)
  2734. skipNewlines(p)
  2735. inc p.idx, 2 # skip ":" and space
  2736. item.add(term)
  2737. while true:
  2738. var def = newRstNode(rnDefBody)
  2739. let indent = getMdBlockIndent(p)
  2740. pushInd(p, indent)
  2741. parseSection(p, def)
  2742. popInd(p)
  2743. item.add(def)
  2744. let j = skipNewlines(p, p.idx)
  2745. if isMdDefBody(p, j, termCol): # parse next definition body
  2746. p.idx = j + 2 # skip ":" and space
  2747. else:
  2748. break
  2749. result.add(item)
  2750. let j = skipNewlines(p, p.idx)
  2751. if p.tok[j].col == termCol and isMdDefListItem(p, j):
  2752. p.idx = j # parse next item
  2753. else:
  2754. break
  2755. proc parseDefinitionList(p: var RstParser): PRstNode =
  2756. result = nil
  2757. var j = tokenAfterNewline(p) - 1
  2758. if j >= 1 and p.tok[j].kind == tkIndent and
  2759. p.tok[j].ival > currInd(p) and p.tok[j - 1].symbol != "::":
  2760. var col = currentTok(p).col
  2761. result = newRstNodeA(p, rnDefList)
  2762. while true:
  2763. if isOptionList(p):
  2764. break # option list has priority over def.list
  2765. j = p.idx
  2766. var a = newRstNode(rnDefName)
  2767. parseLine(p, a)
  2768. if currentTok(p).kind == tkIndent and
  2769. currentTok(p).ival > currInd(p) and
  2770. nextTok(p).symbol != "::" and
  2771. nextTok(p).kind notin {tkIndent, tkEof}:
  2772. pushInd(p, currentTok(p).ival)
  2773. var b = newRstNode(rnDefBody)
  2774. parseSection(p, b)
  2775. var c = newRstNode(rnDefItem)
  2776. c.add(a)
  2777. c.add(b)
  2778. result.add(c)
  2779. popInd(p)
  2780. else:
  2781. p.idx = j
  2782. break
  2783. if currentTok(p).kind == tkIndent and currentTok(p).ival == col:
  2784. inc p.idx
  2785. j = tokenAfterNewline(p) - 1
  2786. if j >= 1 and p.tok[j].kind == tkIndent and p.tok[j].ival > col and
  2787. p.tok[j-1].symbol != "::" and p.tok[j+1].kind != tkIndent:
  2788. discard
  2789. else:
  2790. break
  2791. if result.len == 0: result = nil
  2792. proc parseEnumList(p: var RstParser): PRstNode =
  2793. const
  2794. wildcards: array[0..5, string] = ["(n) ", "n) ", "n. ",
  2795. "(x) ", "x) ", "x. "]
  2796. # enumerator patterns, where 'x' means letter and 'n' means number
  2797. wildToken: array[0..5, int] = [4, 3, 3, 4, 3, 3] # number of tokens
  2798. wildIndex: array[0..5, int] = [1, 0, 0, 1, 0, 0]
  2799. # position of enumeration sequence (number/letter) in enumerator
  2800. let col = currentTok(p).col
  2801. var w = 0
  2802. while w < wildcards.len:
  2803. if match(p, p.idx, wildcards[w]): break
  2804. inc w
  2805. assert w < wildcards.len
  2806. proc checkAfterNewline(p: RstParser, report: bool): bool =
  2807. ## If no indentation on the next line then parse as a normal paragraph
  2808. ## according to the RST spec. And report a warning with suggestions
  2809. let j = tokenAfterNewline(p, start=p.idx+1)
  2810. let requiredIndent = p.tok[p.idx+wildToken[w]].col
  2811. if p.tok[j].kind notin {tkIndent, tkEof} and
  2812. p.tok[j].col < requiredIndent and
  2813. (p.tok[j].col > col or
  2814. (p.tok[j].col == col and not match(p, j, wildcards[w]))):
  2815. if report:
  2816. let n = p.line + p.tok[j].line
  2817. let msg = "\n" & """
  2818. not enough indentation on line $2
  2819. (should be at column $3 if it's a continuation of enum. list),
  2820. or no blank line after line $1 (if it should be the next paragraph),
  2821. or no escaping \ at the beginning of line $1
  2822. (if lines $1..$2 are a normal paragraph, not enum. list)""".dedent
  2823. let c = p.col + requiredIndent + ColRstOffset
  2824. rstMessage(p, mwRstStyle, msg % [$(n-1), $n, $c],
  2825. p.tok[j].line, p.tok[j].col)
  2826. result = false
  2827. else:
  2828. result = true
  2829. if not checkAfterNewline(p, report = true):
  2830. return nil
  2831. result = newRstNodeA(p, rnEnumList)
  2832. let autoEnums = if roSupportMarkdown in p.s.options: @["#", "1"] else: @["#"]
  2833. var prevAE = "" # so as not allow mixing auto-enumerators `1` and `#`
  2834. var curEnum = 1
  2835. for i in 0 ..< wildToken[w]-1: # add first enumerator with (, ), and .
  2836. if p.tok[p.idx + i].symbol == "#":
  2837. prevAE = "#"
  2838. result.labelFmt.add "1"
  2839. else:
  2840. result.labelFmt.add p.tok[p.idx + i].symbol
  2841. var prevEnum = p.tok[p.idx + wildIndex[w]].symbol
  2842. inc p.idx, wildToken[w]
  2843. while true:
  2844. var item = newRstNode(rnEnumItem)
  2845. pushInd(p, currentTok(p).col)
  2846. parseSection(p, item)
  2847. popInd(p)
  2848. result.add(item)
  2849. if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
  2850. match(p, p.idx+1, wildcards[w]):
  2851. # don't report to avoid duplication of warning since for
  2852. # subsequent enum. items parseEnumList will be called second time:
  2853. if not checkAfterNewline(p, report = false):
  2854. break
  2855. let enumerator = p.tok[p.idx + 1 + wildIndex[w]].symbol
  2856. # check that it's in sequence: enumerator == next(prevEnum)
  2857. if "n" in wildcards[w]: # arabic numeral
  2858. let prevEnumI = try: parseInt(prevEnum) except ValueError: 1
  2859. if enumerator in autoEnums:
  2860. if prevAE != "" and enumerator != prevAE:
  2861. break
  2862. prevAE = enumerator
  2863. curEnum = prevEnumI + 1
  2864. else: curEnum = (try: parseInt(enumerator) except ValueError: 1)
  2865. if curEnum - prevEnumI != 1:
  2866. break
  2867. prevEnum = enumerator
  2868. else: # a..z
  2869. let prevEnumI = ord(prevEnum[0])
  2870. if enumerator == "#": curEnum = prevEnumI + 1
  2871. else: curEnum = ord(enumerator[0])
  2872. if curEnum - prevEnumI != 1:
  2873. break
  2874. prevEnum = $chr(curEnum)
  2875. inc p.idx, 1 + wildToken[w]
  2876. else:
  2877. break
  2878. proc prefix(ftnType: FootnoteType): string =
  2879. case ftnType
  2880. of fnManualNumber: result = "footnote-"
  2881. of fnAutoNumber: result = "footnoteauto-"
  2882. of fnAutoNumberLabel: result = "footnote-"
  2883. of fnAutoSymbol: result = "footnotesym-"
  2884. of fnCitation: result = "citation-"
  2885. proc parseFootnote(p: var RstParser): PRstNode {.gcsafe.} =
  2886. ## Parses footnotes and citations, always returns 2 sons:
  2887. ##
  2888. ## 1) footnote label, always containing rnInner with 1 or more sons
  2889. ## 2) footnote body, which may be nil
  2890. var label: PRstNode
  2891. if isRst(p):
  2892. inc p.idx # skip space after `..`
  2893. label = parseFootnoteName(p, reference=false)
  2894. if label == nil:
  2895. if isRst(p):
  2896. dec p.idx
  2897. return nil
  2898. result = newRstNode(rnFootnote)
  2899. result.add label
  2900. let (fnType, i) = getFootnoteType(p.s, label)
  2901. var name = ""
  2902. var anchor = fnType.prefix
  2903. case fnType
  2904. of fnManualNumber:
  2905. addFootnoteNumManual(p, i)
  2906. anchor.add $i
  2907. of fnAutoNumber, fnAutoNumberLabel:
  2908. name = rstnodeToRefname(label)
  2909. addFootnoteNumAuto(p, name)
  2910. if fnType == fnAutoNumberLabel:
  2911. anchor.add name
  2912. else: # fnAutoNumber
  2913. result.order = p.s.lineFootnoteNum.len
  2914. anchor.add $result.order
  2915. of fnAutoSymbol:
  2916. addFootnoteSymAuto(p)
  2917. result.order = p.s.lineFootnoteSym.len
  2918. anchor.add $p.s.lineFootnoteSym.len
  2919. of fnCitation:
  2920. anchor.add rstnodeToRefname(label)
  2921. addAnchorRst(p, anchor, target = result, anchorType = footnoteAnchor)
  2922. result.anchor = anchor
  2923. if currentTok(p).kind == tkWhite: inc p.idx
  2924. discard parseBlockContent(p, result, parseSectionWrapper)
  2925. if result.len < 2:
  2926. result.add nil
  2927. proc sonKind(father: PRstNode, i: int): RstNodeKind =
  2928. result = rnLeaf
  2929. if i < father.len: result = father.sons[i].kind
  2930. proc parseSection(p: var RstParser, result: PRstNode) =
  2931. ## parse top-level RST elements: sections, transitions and body elements.
  2932. while true:
  2933. var leave = false
  2934. assert(p.idx >= 0)
  2935. while currentTok(p).kind == tkIndent:
  2936. if currInd(p) == currentTok(p).ival:
  2937. inc p.idx
  2938. elif currentTok(p).ival > currInd(p):
  2939. if roPreferMarkdown in p.s.options: # Markdown => normal paragraphs
  2940. if currentTok(p).ival - currInd(p) >= 4:
  2941. result.add parseLiteralBlock(p)
  2942. else:
  2943. pushInd(p, currentTok(p).ival)
  2944. parseSection(p, result)
  2945. popInd(p)
  2946. else: # RST mode => block quotes
  2947. pushInd(p, currentTok(p).ival)
  2948. var a = newRstNodeA(p, rnBlockQuote)
  2949. parseSection(p, a)
  2950. result.add(a)
  2951. popInd(p)
  2952. else:
  2953. while currentTok(p).kind != tkEof and nextTok(p).kind == tkIndent:
  2954. inc p.idx # skip blank lines
  2955. leave = true
  2956. break
  2957. if leave or currentTok(p).kind == tkEof: break
  2958. var a: PRstNode = nil
  2959. var k = whichSection(p)
  2960. case k
  2961. of rnLiteralBlock:
  2962. inc p.idx # skip '::'
  2963. a = parseLiteralBlock(p)
  2964. of rnBulletList: a = parseBulletList(p)
  2965. of rnLineBlock: a = parseLineBlock(p)
  2966. of rnMarkdownBlockQuote: a = parseMarkdownBlockQuote(p)
  2967. of rnDirective: a = parseDotDot(p)
  2968. of rnFootnote: a = parseFootnote(p)
  2969. of rnEnumList: a = parseEnumList(p)
  2970. of rnLeaf: rstMessage(p, meNewSectionExpected, "(syntax error)")
  2971. of rnParagraph: discard
  2972. of rnDefList: a = parseDefinitionList(p)
  2973. of rnMdDefList: a = parseMdDefinitionList(p)
  2974. of rnFieldList:
  2975. if p.idx > 0: dec p.idx
  2976. a = parseFields(p)
  2977. of rnTransition: a = parseTransition(p)
  2978. of rnHeadline, rnMarkdownHeadline: a = parseHeadline(p)
  2979. of rnOverline: a = parseOverline(p)
  2980. of rnTable: a = parseSimpleTable(p)
  2981. of rnMarkdownTable: a = parseMarkdownTable(p)
  2982. of rnOptionList: a = parseOptionList(p)
  2983. else:
  2984. #InternalError("rst.parseSection()")
  2985. discard
  2986. if a == nil and k != rnDirective:
  2987. a = newRstNodeA(p, rnParagraph)
  2988. parseParagraph(p, a)
  2989. result.addIfNotNil(a)
  2990. if sonKind(result, 0) == rnParagraph and sonKind(result, 1) != rnParagraph:
  2991. result.sons[0] = newRstNode(rnInner, result.sons[0].sons,
  2992. anchor=result.sons[0].anchor)
  2993. proc parseDoc(p: var RstParser): PRstNode =
  2994. result = parseSectionWrapper(p)
  2995. if currentTok(p).kind != tkEof:
  2996. rstMessage(p, meGeneralParseError)
  2997. type
  2998. DirFlag = enum
  2999. hasArg, hasOptions, argIsFile, argIsWord
  3000. DirFlags = set[DirFlag]
  3001. proc parseDirective(p: var RstParser, k: RstNodeKind, flags: DirFlags): PRstNode =
  3002. ## Parses arguments and options for a directive block.
  3003. ##
  3004. ## A directive block will always have three sons: the arguments for the
  3005. ## directive (rnDirArg), the options (rnFieldList) and the directive
  3006. ## content block. This proc parses the two first nodes, the 3rd is left to
  3007. ## the outer `parseDirective` call.
  3008. ##
  3009. ## Both rnDirArg and rnFieldList children nodes might be nil, so you need to
  3010. ## check them before accessing.
  3011. result = newRstNodeA(p, k)
  3012. if k == rnCodeBlock: result.info = lineInfo(p)
  3013. var args: PRstNode = nil
  3014. var options: PRstNode = nil
  3015. if hasArg in flags:
  3016. args = newRstNode(rnDirArg)
  3017. if argIsFile in flags:
  3018. while true:
  3019. case currentTok(p).kind
  3020. of tkWord, tkOther, tkPunct, tkAdornment:
  3021. args.add(newLeaf(p))
  3022. inc p.idx
  3023. else: break
  3024. elif argIsWord in flags:
  3025. while currentTok(p).kind == tkWhite: inc p.idx
  3026. if currentTok(p).kind == tkWord:
  3027. args.add(newLeaf(p))
  3028. inc p.idx
  3029. else:
  3030. args = nil
  3031. else:
  3032. parseLine(p, args)
  3033. result.add(args)
  3034. if hasOptions in flags:
  3035. if currentTok(p).kind == tkIndent and currentTok(p).ival > currInd(p) and
  3036. nextTok(p).symbol == ":":
  3037. pushInd(p, currentTok(p).ival)
  3038. options = parseFields(p)
  3039. popInd(p)
  3040. result.add(options)
  3041. proc parseDirective(p: var RstParser, k: RstNodeKind, flags: DirFlags,
  3042. contentParser: SectionParser): PRstNode =
  3043. ## A helper proc that does main work for specific directive procs.
  3044. ## Always returns a generic rnDirective tree with these 3 children:
  3045. ##
  3046. ## 1) rnDirArg
  3047. ## 2) rnFieldList
  3048. ## 3) a node returned by `contentParser`.
  3049. ##
  3050. ## .. warning:: Any of the 3 children may be nil.
  3051. result = parseDirective(p, k, flags)
  3052. if not isNil(contentParser) and
  3053. parseBlockContent(p, result, contentParser):
  3054. discard "result is updated by parseBlockContent"
  3055. else:
  3056. result.add(PRstNode(nil))
  3057. proc parseDirBody(p: var RstParser, contentParser: SectionParser): PRstNode =
  3058. if indFollows(p):
  3059. pushInd(p, currentTok(p).ival)
  3060. result = contentParser(p)
  3061. popInd(p)
  3062. proc dirInclude(p: var RstParser): PRstNode =
  3063. ##
  3064. ## The following options are recognized:
  3065. ##
  3066. ## :start-after: text to find in the external data file
  3067. ##
  3068. ## Only the content after the first occurrence of the specified
  3069. ## text will be included. If text is not found inclusion will
  3070. ## start from beginning of the file
  3071. ##
  3072. ## :end-before: text to find in the external data file
  3073. ##
  3074. ## Only the content before the first occurrence of the specified
  3075. ## text (but after any after text) will be included. If text is
  3076. ## not found inclusion will happen until the end of the file.
  3077. #literal : flag (empty)
  3078. # The entire included text is inserted into the document as a single
  3079. # literal block (useful for program listings).
  3080. #encoding : name of text encoding
  3081. # The text encoding of the external data file. Defaults to the document's
  3082. # encoding (if specified).
  3083. #
  3084. result = nil
  3085. var n = parseDirective(p, rnDirective, {hasArg, argIsFile, hasOptions}, nil)
  3086. var filename = strip(addNodes(n.sons[0]))
  3087. var path = p.findRelativeFile(filename)
  3088. if path == "":
  3089. rstMessage(p, meCannotOpenFile, filename)
  3090. else:
  3091. # XXX: error handling; recursive file inclusion!
  3092. if getFieldValue(n, "literal") != "":
  3093. result = newRstNode(rnLiteralBlock)
  3094. result.add newLeaf(readFile(path))
  3095. else:
  3096. let inputString = readFile(path)
  3097. let startPosition =
  3098. block:
  3099. let searchFor = n.getFieldValue("start-after").strip()
  3100. if searchFor != "":
  3101. let pos = inputString.find(searchFor)
  3102. if pos != -1: pos + searchFor.len
  3103. else: 0
  3104. else:
  3105. 0
  3106. let endPosition =
  3107. block:
  3108. let searchFor = n.getFieldValue("end-before").strip()
  3109. if searchFor != "":
  3110. let pos = inputString.find(searchFor, start = startPosition)
  3111. if pos != -1: pos - 1
  3112. else: 0
  3113. else:
  3114. inputString.len - 1
  3115. var q: RstParser
  3116. initParser(q, p.s)
  3117. let saveFileIdx = p.s.currFileIdx
  3118. setCurrFilename(p.s, path)
  3119. getTokens(
  3120. inputString[startPosition..endPosition],
  3121. q.tok)
  3122. # workaround a GCC bug; more like the interior pointer bug?
  3123. #if find(q.tok[high(q.tok)].symbol, "\0\x01\x02") > 0:
  3124. # InternalError("Too many binary zeros in include file")
  3125. result = parseDoc(q)
  3126. p.s.currFileIdx = saveFileIdx
  3127. proc dirCodeBlock(p: var RstParser, nimExtension = false): PRstNode =
  3128. ## Parses a code block.
  3129. ##
  3130. ## Code blocks are rnDirective trees with a `kind` of rnCodeBlock. See the
  3131. ## description of ``parseDirective`` for further structure information.
  3132. ##
  3133. ## Code blocks can come in two forms, the standard `code directive
  3134. ## <http://docutils.sourceforge.net/docs/ref/rst/directives.html#code>`_ and
  3135. ## the nim extension ``.. code-block::``. If the block is an extension, we
  3136. ## want the default language syntax highlighting to be Nim, so we create a
  3137. ## fake internal field to communicate with the generator. The field is named
  3138. ## ``default-language``, which is unlikely to collide with a field specified
  3139. ## by any random rst input file.
  3140. ##
  3141. ## As an extension this proc will process the ``file`` extension field and if
  3142. ## present will replace the code block with the contents of the referenced
  3143. ## file. This behaviour is disabled in sandboxed mode and can be re-enabled
  3144. ## with the `roSandboxDisabled` flag.
  3145. result = parseDirective(p, rnCodeBlock, {hasArg, hasOptions}, parseLiteralBlock)
  3146. mayLoadFile(p, result)
  3147. # Extend the field block if we are using our custom Nim extension.
  3148. if nimExtension:
  3149. defaultCodeLangNim(p, result)
  3150. proc dirContainer(p: var RstParser): PRstNode =
  3151. result = parseDirective(p, rnContainer, {hasArg}, parseSectionWrapper)
  3152. assert(result.len == 3)
  3153. proc dirImage(p: var RstParser): PRstNode =
  3154. result = parseDirective(p, rnImage, {hasOptions, hasArg, argIsFile}, nil)
  3155. proc dirFigure(p: var RstParser): PRstNode =
  3156. result = parseDirective(p, rnFigure, {hasOptions, hasArg, argIsFile},
  3157. parseSectionWrapper)
  3158. proc dirTitle(p: var RstParser): PRstNode =
  3159. result = parseDirective(p, rnTitle, {hasArg}, nil)
  3160. proc dirContents(p: var RstParser): PRstNode =
  3161. result = parseDirective(p, rnContents, {hasArg}, nil)
  3162. p.s.hasToc = true
  3163. proc dirIndex(p: var RstParser): PRstNode =
  3164. result = parseDirective(p, rnIndex, {}, parseSectionWrapper)
  3165. proc dirAdmonition(p: var RstParser, d: string): PRstNode =
  3166. result = parseDirective(p, rnAdmonition, {}, parseSectionWrapper)
  3167. result.adType = d
  3168. proc dirDefaultRole(p: var RstParser): PRstNode =
  3169. result = parseDirective(p, rnDefaultRole, {hasArg}, nil)
  3170. if result.sons[0].len == 0: p.s.currRole = defaultRole(p.s.options)
  3171. else:
  3172. assert result.sons[0].sons[0].kind == rnLeaf
  3173. p.s.currRole = result.sons[0].sons[0].text
  3174. p.s.currRoleKind = whichRole(p, p.s.currRole)
  3175. proc dirRole(p: var RstParser): PRstNode =
  3176. result = parseDirective(p, rnDirective, {hasArg, hasOptions}, nil)
  3177. # just check that language is supported, TODO: real role association
  3178. let lang = getFieldValue(result, "language").strip
  3179. if lang != "" and getSourceLanguage(lang) == langNone:
  3180. rstMessage(p, mwUnsupportedLanguage, lang)
  3181. proc dirRawAux(p: var RstParser, result: var PRstNode, kind: RstNodeKind,
  3182. contentParser: SectionParser) =
  3183. var filename = getFieldValue(result, "file")
  3184. if filename.len > 0:
  3185. var path = p.findRelativeFile(filename)
  3186. if path.len == 0:
  3187. rstMessage(p, meCannotOpenFile, filename)
  3188. else:
  3189. var f = readFile(path)
  3190. result = newRstNode(kind)
  3191. result.add newLeaf(f)
  3192. else:
  3193. result = newRstNode(kind, result.sons)
  3194. result.add(parseDirBody(p, contentParser))
  3195. proc dirRaw(p: var RstParser): PRstNode =
  3196. #
  3197. #The following options are recognized:
  3198. #
  3199. #file : string (newlines removed)
  3200. # The local filesystem path of a raw data file to be included.
  3201. #
  3202. # html
  3203. # latex
  3204. result = parseDirective(p, rnDirective, {hasOptions, hasArg, argIsWord})
  3205. if result.sons[0] != nil:
  3206. if cmpIgnoreCase(result.sons[0].sons[0].text, "html") == 0:
  3207. dirRawAux(p, result, rnRawHtml, parseLiteralBlock)
  3208. elif cmpIgnoreCase(result.sons[0].sons[0].text, "latex") == 0:
  3209. dirRawAux(p, result, rnRawLatex, parseLiteralBlock)
  3210. else:
  3211. rstMessage(p, meInvalidDirective, result.sons[0].sons[0].text)
  3212. else:
  3213. dirRawAux(p, result, rnRaw, parseSectionWrapper)
  3214. proc dirImportdoc(p: var RstParser): PRstNode =
  3215. result = parseDirective(p, rnDirective, {}, parseLiteralBlock)
  3216. assert result.sons[2].kind == rnLiteralBlock
  3217. assert result.sons[2].sons[0].kind == rnLeaf
  3218. let filenames: seq[string] = split(result.sons[2].sons[0].text, seps = {','})
  3219. proc rmSpaces(s: string): string = s.split.join("")
  3220. for origFilename in filenames:
  3221. p.s.idxImports[origFilename.rmSpaces] = ImportdocInfo(fromInfo: lineInfo(p))
  3222. proc selectDir(p: var RstParser, d: string): PRstNode =
  3223. result = nil
  3224. let tok = p.tok[p.idx-2] # report on directive in ".. directive::"
  3225. if roSandboxDisabled notin p.s.options:
  3226. if d notin SandboxDirAllowlist:
  3227. rstMessage(p, meSandboxedDirective, d, tok.line, tok.col)
  3228. case d
  3229. of "admonition", "attention", "caution": result = dirAdmonition(p, d)
  3230. of "code": result = dirCodeBlock(p)
  3231. of "code-block": result = dirCodeBlock(p, nimExtension = true)
  3232. of "container": result = dirContainer(p)
  3233. of "contents": result = dirContents(p)
  3234. of "danger": result = dirAdmonition(p, d)
  3235. of "default-role": result = dirDefaultRole(p)
  3236. of "error": result = dirAdmonition(p, d)
  3237. of "figure": result = dirFigure(p)
  3238. of "hint": result = dirAdmonition(p, d)
  3239. of "image": result = dirImage(p)
  3240. of "important": result = dirAdmonition(p, d)
  3241. of "importdoc": result = dirImportdoc(p)
  3242. of "include": result = dirInclude(p)
  3243. of "index": result = dirIndex(p)
  3244. of "note": result = dirAdmonition(p, d)
  3245. of "raw":
  3246. if roSupportRawDirective in p.s.options:
  3247. result = dirRaw(p)
  3248. else:
  3249. rstMessage(p, meInvalidDirective, d)
  3250. of "role": result = dirRole(p)
  3251. of "tip": result = dirAdmonition(p, d)
  3252. of "title": result = dirTitle(p)
  3253. of "warning": result = dirAdmonition(p, d)
  3254. else:
  3255. rstMessage(p, meInvalidDirective, d, tok.line, tok.col)
  3256. proc parseDotDot(p: var RstParser): PRstNode =
  3257. # parse "explicit markup blocks"
  3258. result = nil
  3259. var n: PRstNode # to store result, workaround for bug 16855
  3260. var col = currentTok(p).col
  3261. inc p.idx
  3262. var d = getDirective(p)
  3263. if d != "":
  3264. pushInd(p, col)
  3265. result = selectDir(p, d)
  3266. popInd(p)
  3267. elif match(p, p.idx, " _"):
  3268. # hyperlink target:
  3269. inc p.idx, 2
  3270. var ending = ":"
  3271. if currentTok(p).symbol == "`":
  3272. inc p.idx
  3273. ending = "`"
  3274. var a = getReferenceName(p, ending)
  3275. if ending == "`":
  3276. if currentTok(p).symbol == ":":
  3277. inc p.idx
  3278. else:
  3279. rstMessage(p, meExpected, ":")
  3280. if currentTok(p).kind == tkWhite: inc p.idx
  3281. var b = untilEol(p)
  3282. if len(b) == 0: # set internal anchor
  3283. p.curAnchors.add ManualAnchor(
  3284. alias: linkName(a), anchor: rstnodeToRefname(a), info: prevLineInfo(p)
  3285. )
  3286. else: # external hyperlink
  3287. setRef(p, rstnodeToRefname(a), b, refType=hyperlinkAlias)
  3288. elif match(p, p.idx, " |"):
  3289. # substitution definitions:
  3290. inc p.idx, 2
  3291. var a = getReferenceName(p, "|")
  3292. var b: PRstNode
  3293. if currentTok(p).kind == tkWhite: inc p.idx
  3294. if cmpIgnoreStyle(currentTok(p).symbol, "replace") == 0:
  3295. inc p.idx
  3296. expect(p, "::")
  3297. b = untilEol(p)
  3298. elif cmpIgnoreStyle(currentTok(p).symbol, "image") == 0:
  3299. inc p.idx
  3300. b = dirImage(p)
  3301. else:
  3302. rstMessage(p, meInvalidDirective, currentTok(p).symbol)
  3303. setSub(p, addNodes(a), b)
  3304. elif match(p, p.idx, " [") and
  3305. (n = parseFootnote(p); n != nil):
  3306. result = n
  3307. else:
  3308. result = parseComment(p, col)
  3309. proc rstParsePass1*(fragment: string,
  3310. line, column: int,
  3311. sharedState: PRstSharedState): PRstNode =
  3312. ## Parses an RST `fragment`.
  3313. ## The result should be further processed by
  3314. ## preparePass2_ and resolveSubs_ (which is pass 2).
  3315. var p: RstParser
  3316. initParser(p, sharedState)
  3317. p.line = line
  3318. p.col = column
  3319. getTokens(fragment, p.tok)
  3320. result = parseDoc(p)
  3321. proc extractLinkEnd(x: string): string =
  3322. ## From links like `path/to/file.html#/%` extract `file.html#/%`.
  3323. let i = find(x, '#')
  3324. let last =
  3325. if i >= 0: i
  3326. else: x.len - 1
  3327. let j = rfind(x, '/', start=0, last=last)
  3328. if j >= 0:
  3329. result = x[j+1 .. ^1]
  3330. else:
  3331. result = x
  3332. proc loadIdxFile(s: var PRstSharedState, origFilename: string) =
  3333. doAssert roSandboxDisabled in s.options
  3334. var info: TLineInfo
  3335. info.fileIndex = addFilename(s, origFilename)
  3336. var (dir, basename, ext) = origFilename.splitFile
  3337. if ext notin [".md", ".rst", ".nim", ""]:
  3338. rstMessage(s.filenames, s.msgHandler, s.idxImports[origFilename].fromInfo,
  3339. meCannotOpenFile, origFilename & ": unknown extension")
  3340. let idxFilename = dir / basename & ".idx"
  3341. let (idxPath, linkRelPath) = s.findRefFile(idxFilename)
  3342. s.idxImports[origFilename].linkRelPath = linkRelPath
  3343. var
  3344. fileEntries: seq[IndexEntry]
  3345. title: IndexEntry
  3346. try:
  3347. (fileEntries, title) = parseIdxFile(idxPath)
  3348. except IOError:
  3349. rstMessage(s.filenames, s.msgHandler, s.idxImports[origFilename].fromInfo,
  3350. meCannotOpenFile, idxPath)
  3351. except ValueError as e:
  3352. s.msgHandler(idxPath, LineRstInit, ColRstInit, meInvalidField, e.msg)
  3353. var isMarkup = false # for sanity check to avoid mixing .md <-> .nim
  3354. for entry in fileEntries:
  3355. # Though target .idx already has inside it the path to HTML relative
  3356. # project's root, we won't rely on it and use `linkRelPath` instead.
  3357. let refn = extractLinkEnd(entry.link)
  3358. # select either markup (rst/md) or Nim cases:
  3359. if entry.kind in {ieMarkupTitle, ieNimTitle}:
  3360. s.idxImports[origFilename].title = entry.keyword
  3361. case entry.kind
  3362. of ieIdxRole, ieHeading, ieMarkupTitle:
  3363. if ext == ".nim" and entry.kind == ieMarkupTitle:
  3364. rstMessage(s, idxPath, meInvalidField,
  3365. $ieMarkupTitle & " in supposedly .nim-derived file")
  3366. if entry.kind == ieMarkupTitle:
  3367. isMarkup = true
  3368. info.line = entry.line.uint16
  3369. addAnchorExtRst(s, key = entry.keyword, refn = refn,
  3370. anchorType = headlineAnchor, info=info)
  3371. of ieNim, ieNimGroup, ieNimTitle:
  3372. if ext in [".md", ".rst"] or isMarkup:
  3373. rstMessage(s, idxPath, meInvalidField,
  3374. $entry.kind & " in supposedly markup-derived file")
  3375. s.nimFileImported = true
  3376. var langSym: LangSymbol
  3377. if entry.kind in {ieNim, ieNimTitle}:
  3378. var q: RstParser
  3379. initParser(q, s)
  3380. info.line = entry.line.uint16
  3381. setLen(q.tok, 0)
  3382. q.idx = 0
  3383. getTokens(entry.linkTitle, q.tok)
  3384. var sons = newSeq[PRstNode](q.tok.len)
  3385. for i in 0 ..< q.tok.len: sons[i] = newLeaf(q.tok[i].symbol)
  3386. let linkTitle = newRstNode(rnInner, sons)
  3387. langSym = linkTitle.toLangSymbol
  3388. else: # entry.kind == ieNimGroup
  3389. langSym = langSymbolGroup(kind=entry.linkTitle, name=entry.keyword)
  3390. addAnchorNim(s, external = true, refn = refn, tooltip = entry.linkDesc,
  3391. langSym = langSym, priority = -4, # lowest
  3392. info = info, module = info.fileIndex)
  3393. doAssert s.idxImports[origFilename].title != ""
  3394. proc preparePass2*(s: var PRstSharedState, mainNode: PRstNode, importdoc = true) =
  3395. ## Records titles in node `mainNode` and orders footnotes.
  3396. countTitles(s, mainNode)
  3397. fixHeadlines(s)
  3398. orderFootnotes(s)
  3399. if importdoc:
  3400. for origFilename in s.idxImports.keys:
  3401. loadIdxFile(s, origFilename)
  3402. proc resolveLink(s: PRstSharedState, n: PRstNode) : PRstNode =
  3403. # Associate this link alias with its target and change node kind to
  3404. # rnHyperlink or rnInternalRef appropriately.
  3405. var desc, alias: PRstNode
  3406. if n.kind == rnPandocRef: # link like [desc][alias]
  3407. desc = n.sons[0]
  3408. alias = n.sons[1]
  3409. else: # n.kind == rnRstRef, link like `desc=alias`_
  3410. desc = n
  3411. alias = n
  3412. type LinkDef = object
  3413. ar: AnchorRule
  3414. priority: int
  3415. tooltip: string
  3416. target: PRstNode
  3417. info: TLineInfo
  3418. externFilename: string
  3419. # when external anchor: origin filename where anchor was defined
  3420. isTitle: bool
  3421. proc cmp(x, y: LinkDef): int =
  3422. result = cmp(x.priority, y.priority)
  3423. if result == 0:
  3424. result = cmp(x.target, y.target)
  3425. var foundLinks: seq[LinkDef]
  3426. let refn = rstnodeToRefname(alias)
  3427. var hyperlinks = findRef(s, refn)
  3428. for y in hyperlinks:
  3429. foundLinks.add LinkDef(ar: arHyperlink, priority: refPriority(y.kind),
  3430. target: y.value, info: y.info,
  3431. tooltip: "(" & $y.kind & ")")
  3432. let substRst = findMainAnchorRst(s, alias.addNodes, n.info)
  3433. template getExternFilename(subst: AnchorSubst): string =
  3434. if subst.kind == arExternalRst or
  3435. (subst.kind == arNim and subst.external):
  3436. getFilename(s, subst)
  3437. else: ""
  3438. for subst in substRst:
  3439. var refname, fullRefname: string
  3440. if subst.kind == arInternalRst:
  3441. refname = subst.target.anchor
  3442. fullRefname = refname
  3443. else: # arExternalRst
  3444. refname = subst.refnameExt
  3445. fullRefname = s.idxImports[getFilename(s, subst)].linkRelPath &
  3446. "/" & refname
  3447. let anchorType =
  3448. if subst.kind == arInternalRst: subst.anchorType
  3449. else: subst.anchorTypeExt # arExternalRst
  3450. foundLinks.add LinkDef(ar: subst.kind, priority: subst.priority,
  3451. target: newLeaf(fullRefname),
  3452. info: subst.info,
  3453. externFilename: getExternFilename(subst),
  3454. isTitle: isDocumentationTitle(refname),
  3455. tooltip: "(" & $anchorType & ")")
  3456. # find anchors automatically generated from Nim symbols
  3457. if roNimFile in s.options or s.nimFileImported:
  3458. let substNim = findMainAnchorNim(s, signature=alias, n.info)
  3459. for subst in substNim:
  3460. let fullRefname =
  3461. if subst.external:
  3462. s.idxImports[getFilename(s, subst)].linkRelPath &
  3463. "/" & subst.refname
  3464. else: subst.refname
  3465. foundLinks.add LinkDef(ar: subst.kind, priority: subst.priority,
  3466. target: newLeaf(fullRefname),
  3467. externFilename: getExternFilename(subst),
  3468. isTitle: isDocumentationTitle(subst.refname),
  3469. info: subst.info, tooltip: subst.tooltip)
  3470. foundLinks.sort(cmp = cmp, order = Descending)
  3471. let aliasStr = addNodes(alias)
  3472. if foundLinks.len >= 1:
  3473. if foundLinks[0].externFilename != "":
  3474. s.idxImports[foundLinks[0].externFilename].used = true
  3475. let kind = if foundLinks[0].ar in {arHyperlink, arExternalRst}: rnHyperlink
  3476. elif foundLinks[0].ar == arNim:
  3477. if foundLinks[0].externFilename == "": rnNimdocRef
  3478. else: rnHyperlink
  3479. else: rnInternalRef
  3480. result = newRstNode(kind)
  3481. let documentName = # filename without ext for `.nim`, title for `.md`
  3482. if foundLinks[0].ar == arNim:
  3483. changeFileExt(foundLinks[0].externFilename.extractFilename, "")
  3484. elif foundLinks[0].externFilename != "":
  3485. s.idxImports[foundLinks[0].externFilename].title
  3486. else: foundLinks[0].externFilename.extractFilename
  3487. let linkText =
  3488. if foundLinks[0].externFilename != "":
  3489. if foundLinks[0].isTitle: newLeaf(addNodes(desc))
  3490. else: newLeaf(documentName & ": " & addNodes(desc))
  3491. else:
  3492. newRstNode(rnInner, desc.sons)
  3493. result.sons = @[linkText, foundLinks[0].target]
  3494. if kind == rnNimdocRef: result.tooltip = foundLinks[0].tooltip
  3495. if foundLinks.len > 1: # report ambiguous link
  3496. var targets = newSeq[string]()
  3497. for l in foundLinks:
  3498. var t = " "
  3499. if s.filenames.len > 1:
  3500. t.add getFilename(s.filenames, l.info.fileIndex)
  3501. let n = l.info.line
  3502. let c = l.info.col + ColRstOffset
  3503. t.add "($1, $2): $3" % [$n, $c, l.tooltip]
  3504. targets.add t
  3505. rstMessage(s.filenames, s.msgHandler, n.info, mwAmbiguousLink,
  3506. "`$1`\n clash:\n$2" % [
  3507. aliasStr, targets.join("\n")])
  3508. else: # nothing found
  3509. result = n
  3510. rstMessage(s.filenames, s.msgHandler, n.info, mwBrokenLink, aliasStr)
  3511. proc resolveSubs*(s: PRstSharedState, n: PRstNode): PRstNode =
  3512. ## Makes pass 2 of RST parsing.
  3513. ## Resolves substitutions and anchor aliases, groups footnotes.
  3514. ## Takes input node `n` and returns the same node with recursive
  3515. ## substitutions in `n.sons` to `result`.
  3516. result = n
  3517. if n == nil: return
  3518. case n.kind
  3519. of rnSubstitutionReferences:
  3520. var x = findSub(s, n)
  3521. if x >= 0:
  3522. result = s.subs[x].value
  3523. else:
  3524. var key = addNodes(n)
  3525. var e = getEnv(key)
  3526. if e != "": result = newLeaf(e)
  3527. else: rstMessage(s.filenames, s.msgHandler, n.info,
  3528. mwUnknownSubstitution, key)
  3529. of rnRstRef, rnPandocRef:
  3530. result = resolveLink(s, n)
  3531. of rnFootnote:
  3532. var (fnType, num) = getFootnoteType(s, n.sons[0])
  3533. case fnType
  3534. of fnManualNumber, fnCitation:
  3535. discard "no need to alter fixed text"
  3536. of fnAutoNumberLabel, fnAutoNumber:
  3537. if fnType == fnAutoNumberLabel:
  3538. let labelR = rstnodeToRefname(n.sons[0])
  3539. num = getFootnoteNum(s, labelR)
  3540. else:
  3541. num = getFootnoteNum(s, n.order)
  3542. var nn = newRstNode(rnInner)
  3543. nn.add newLeaf($num)
  3544. result.sons[0] = nn
  3545. of fnAutoSymbol:
  3546. let sym = getAutoSymbol(s, n.order)
  3547. n.sons[0].sons[0].text = sym
  3548. n.sons[1] = resolveSubs(s, n.sons[1])
  3549. of rnFootnoteRef:
  3550. var (fnType, num) = getFootnoteType(s, n.sons[0])
  3551. template addLabel(number: int | string) =
  3552. var nn = newRstNode(rnInner)
  3553. nn.add newLeaf($number)
  3554. result.add(nn)
  3555. var refn = fnType.prefix
  3556. # create new rnFootnoteRef, add final label, and finalize target refn:
  3557. result = newRstNode(rnFootnoteRef, info = n.info)
  3558. case fnType
  3559. of fnManualNumber:
  3560. addLabel num
  3561. refn.add $num
  3562. of fnAutoNumber:
  3563. inc s.currFootnoteNumRef
  3564. addLabel getFootnoteNum(s, s.currFootnoteNumRef)
  3565. refn.add $s.currFootnoteNumRef
  3566. of fnAutoNumberLabel:
  3567. addLabel getFootnoteNum(s, rstnodeToRefname(n))
  3568. refn.add rstnodeToRefname(n)
  3569. of fnAutoSymbol:
  3570. inc s.currFootnoteSymRef
  3571. addLabel getAutoSymbol(s, s.currFootnoteSymRef)
  3572. refn.add $s.currFootnoteSymRef
  3573. of fnCitation:
  3574. result.add n.sons[0]
  3575. refn.add rstnodeToRefname(n)
  3576. # TODO: correctly report ambiguities
  3577. let anchorInfo = findMainAnchorRst(s, refn, n.info)
  3578. if anchorInfo.len != 0:
  3579. result.add newLeaf(anchorInfo[0].target.anchor) # add link
  3580. else:
  3581. rstMessage(s.filenames, s.msgHandler, n.info, mwBrokenLink, refn)
  3582. result.add newLeaf(refn) # add link
  3583. of rnLeaf:
  3584. discard
  3585. else:
  3586. var regroup = false
  3587. for i in 0 ..< n.len:
  3588. n.sons[i] = resolveSubs(s, n.sons[i])
  3589. if n.sons[i] != nil and n.sons[i].kind == rnFootnote:
  3590. regroup = true
  3591. if regroup: # group footnotes together into rnFootnoteGroup
  3592. var newSons: seq[PRstNode]
  3593. var i = 0
  3594. while i < n.len:
  3595. if n.sons[i] != nil and n.sons[i].kind == rnFootnote:
  3596. var grp = newRstNode(rnFootnoteGroup)
  3597. while i < n.len and n.sons[i].kind == rnFootnote:
  3598. grp.sons.add n.sons[i]
  3599. inc i
  3600. newSons.add grp
  3601. else:
  3602. newSons.add n.sons[i]
  3603. inc i
  3604. result.sons = newSons
  3605. proc completePass2*(s: PRstSharedState) =
  3606. for (filename, importdocInfo) in s.idxImports.pairs:
  3607. if not importdocInfo.used:
  3608. rstMessage(s.filenames, s.msgHandler, importdocInfo.fromInfo,
  3609. mwUnusedImportdoc, filename)
  3610. proc rstParse*(text, filename: string,
  3611. line, column: int,
  3612. options: RstParseOptions,
  3613. findFile: FindFileHandler = nil,
  3614. findRefFile: FindRefFileHandler = nil,
  3615. msgHandler: MsgHandler = nil):
  3616. tuple[node: PRstNode, filenames: RstFileTable, hasToc: bool] =
  3617. ## Parses the whole `text`. The result is ready for `rstgen.renderRstToOut`,
  3618. ## note that 2nd tuple element should be fed to `initRstGenerator`
  3619. ## argument `filenames` (it is being filled here at least with `filename`
  3620. ## and possibly with other files from RST ``.. include::`` statement).
  3621. var sharedState = newRstSharedState(options, filename, findFile, findRefFile,
  3622. msgHandler, hasToc=false)
  3623. let unresolved = rstParsePass1(text, line, column, sharedState)
  3624. preparePass2(sharedState, unresolved)
  3625. result.node = resolveSubs(sharedState, unresolved)
  3626. completePass2(sharedState)
  3627. result.filenames = sharedState.filenames
  3628. result.hasToc = sharedState.hasToc