rstast.nim 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2012 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## This module implements an AST for the `reStructuredText`:idx: parser.
  10. import std/[strutils, json]
  11. when defined(nimPreviewSlimSystem):
  12. import std/assertions
  13. type
  14. RstNodeKind* = enum ## the possible node kinds of an PRstNode
  15. rnInner, # an inner node or a root
  16. rnHeadline, # a headline
  17. rnOverline, # an over- and underlined headline
  18. rnMarkdownHeadline, # a Markdown headline
  19. rnTransition, # a transition (the ------------- <hr> thingie)
  20. rnParagraph, # a paragraph
  21. rnBulletList, # a bullet list
  22. rnBulletItem, # a bullet item
  23. rnEnumList, # an enumerated list
  24. rnEnumItem, # an enumerated item
  25. rnDefList, rnMdDefList, # a definition list (RST/Markdown)
  26. rnDefItem, # an item of a definition list consisting of ...
  27. rnDefName, # ... a name part ...
  28. rnDefBody, # ... and a body part ...
  29. rnFieldList, # a field list
  30. rnField, # a field item
  31. rnFieldName, # consisting of a field name ...
  32. rnFieldBody, # ... and a field body
  33. rnOptionList, rnOptionListItem, rnOptionGroup, rnOption, rnOptionString,
  34. rnOptionArgument, rnDescription, rnLiteralBlock,
  35. rnMarkdownBlockQuote, # a quote starting from punctuation like >>>
  36. rnMarkdownBlockQuoteItem, # a quotation block, quote lines starting with
  37. # the same number of chars
  38. rnLineBlock, # the | thingie
  39. rnLineBlockItem, # a son of rnLineBlock - one line inside it.
  40. # When `RstNode` lineIndent="\n" the line's empty
  41. rnBlockQuote, # text just indented
  42. rnTable, rnGridTable, rnMarkdownTable, rnTableRow, rnTableHeaderCell, rnTableDataCell,
  43. rnFootnote, # a footnote
  44. rnCitation, # similar to footnote, so use rnFootnote instead
  45. rnFootnoteGroup, # footnote group - exists for a purely stylistic
  46. # reason: to display a few footnotes as 1 block
  47. rnStandaloneHyperlink, rnHyperlink,
  48. rnRstRef, # RST reference like `section name`_
  49. rnPandocRef, # Pandoc Markdown reference like [section name]
  50. rnInternalRef, rnFootnoteRef,
  51. rnNimdocRef, # reference to automatically generated Nim symbol
  52. rnDirective, # a general directive
  53. rnDirArg, # a directive argument (for some directives).
  54. # here are directives that are not rnDirective:
  55. rnRaw, rnTitle, rnContents, rnImage, rnFigure, rnCodeBlock, rnAdmonition,
  56. rnRawHtml, rnRawLatex,
  57. rnContainer, # ``container`` directive
  58. rnIndex, # index directve:
  59. # .. index::
  60. # key
  61. # * `file#id <file#id>`_
  62. # * `file#id <file#id>'_
  63. rnSubstitutionDef, # a definition of a substitution
  64. # Inline markup:
  65. rnInlineCode, # interpreted text with code in a known language
  66. rnCodeFragment, # inline code for highlighting with the specified
  67. # class (which cannot be inferred from context)
  68. rnUnknownRole, # interpreted text with an unknown role
  69. rnSub, rnSup, rnIdx,
  70. rnEmphasis, # "*"
  71. rnStrongEmphasis, # "**"
  72. rnTripleEmphasis, # "***"
  73. rnInterpretedText, # "`" an auxiliary role for parsing that will
  74. # be converted into other kinds like rnInlineCode
  75. rnInlineLiteral, # "``"
  76. rnInlineTarget, # "_`target`"
  77. rnSubstitutionReferences, # "|"
  78. rnSmiley, # some smiley
  79. rnDefaultRole, # .. default-role:: code
  80. rnLeaf # a leaf; the node's text field contains the
  81. # leaf val
  82. FileIndex* = distinct int32
  83. TLineInfo* = object
  84. line*: uint16
  85. col*: int16
  86. fileIndex*: FileIndex
  87. PRstNode* = ref RstNode ## an RST node
  88. RstNodeSeq* = seq[PRstNode]
  89. RstNode* {.acyclic, final.} = object ## AST node (result of RST parsing)
  90. case kind*: RstNodeKind ## the node's kind
  91. of rnLeaf, rnSmiley:
  92. text*: string ## string that is expected to be displayed
  93. of rnEnumList:
  94. labelFmt*: string ## label format like "(1)"
  95. of rnLineBlockItem:
  96. lineIndent*: string ## a few spaces or newline at the line beginning
  97. of rnAdmonition:
  98. adType*: string ## admonition type: "note", "caution", etc. This
  99. ## text will set the style and also be displayed
  100. of rnOverline, rnHeadline, rnMarkdownHeadline:
  101. level*: int ## level of headings starting from 1 (main
  102. ## chapter) to larger ones (minor sub-sections)
  103. ## level=0 means it's document title or subtitle
  104. of rnFootnote, rnCitation, rnOptionListItem:
  105. order*: int ## footnote order (for auto-symbol footnotes and
  106. ## auto-numbered ones without a label)
  107. of rnMarkdownBlockQuoteItem:
  108. quotationDepth*: int ## number of characters in line prefix
  109. of rnRstRef, rnPandocRef, rnSubstitutionReferences,
  110. rnInterpretedText, rnField, rnInlineCode, rnCodeBlock, rnFootnoteRef:
  111. info*: TLineInfo ## To have line/column info for warnings at
  112. ## nodes that are post-processed after parsing
  113. of rnNimdocRef:
  114. tooltip*: string
  115. of rnTable, rnGridTable, rnMarkdownTable:
  116. colCount*: int ## Number of (not-united) cells in the table
  117. of rnTableRow:
  118. endsHeader*: bool ## Is last row in the header of table?
  119. of rnTableHeaderCell, rnTableDataCell:
  120. span*: int ## Number of table columns that the cell occupies
  121. else:
  122. discard
  123. anchor*: string ## anchor, internal link target
  124. ## (aka HTML id tag, aka Latex label/hypertarget)
  125. sons*: RstNodeSeq ## the node's sons
  126. proc `==`*(a, b: FileIndex): bool {.borrow.}
  127. proc len*(n: PRstNode): int =
  128. result = len(n.sons)
  129. proc newRstNode*(kind: RstNodeKind, sons: seq[PRstNode] = @[],
  130. anchor = ""): PRstNode =
  131. result = PRstNode(kind: kind, sons: sons, anchor: anchor)
  132. proc newRstNode*(kind: RstNodeKind, info: TLineInfo,
  133. sons: seq[PRstNode] = @[]): PRstNode =
  134. result = PRstNode(kind: kind, sons: sons)
  135. result.info = info
  136. proc newRstNode*(kind: RstNodeKind, s: string): PRstNode {.deprecated.} =
  137. assert kind in {rnLeaf, rnSmiley}
  138. result = newRstNode(kind)
  139. result.text = s
  140. proc newRstLeaf*(s: string): PRstNode =
  141. result = newRstNode(rnLeaf)
  142. result.text = s
  143. proc lastSon*(n: PRstNode): PRstNode =
  144. result = n.sons[len(n.sons)-1]
  145. proc add*(father, son: PRstNode) =
  146. add(father.sons, son)
  147. proc add*(father: PRstNode; s: string) =
  148. add(father.sons, newRstLeaf(s))
  149. proc addIfNotNil*(father, son: PRstNode) =
  150. if son != nil: add(father, son)
  151. type
  152. RenderContext {.pure.} = object
  153. indent: int
  154. verbatim: int
  155. proc renderRstToRst(d: var RenderContext, n: PRstNode,
  156. result: var string) {.gcsafe.}
  157. proc renderRstSons(d: var RenderContext, n: PRstNode, result: var string) =
  158. for i in countup(0, len(n) - 1):
  159. renderRstToRst(d, n.sons[i], result)
  160. proc renderRstToRst(d: var RenderContext, n: PRstNode, result: var string) =
  161. # this is needed for the index generation; it may also be useful for
  162. # debugging, but most code is already debugged...
  163. const
  164. lvlToChar: array[0..8, char] = ['!', '=', '-', '~', '`', '<', '*', '|', '+']
  165. if n == nil: return
  166. var ind = spaces(d.indent)
  167. case n.kind
  168. of rnInner:
  169. renderRstSons(d, n, result)
  170. of rnHeadline:
  171. result.add("\n")
  172. result.add(ind)
  173. let oldLen = result.len
  174. renderRstSons(d, n, result)
  175. let headlineLen = result.len - oldLen
  176. result.add("\n")
  177. result.add(ind)
  178. result.add repeat(lvlToChar[n.level], headlineLen)
  179. of rnOverline:
  180. result.add("\n")
  181. result.add(ind)
  182. var headline = ""
  183. renderRstSons(d, n, headline)
  184. let lvl = repeat(lvlToChar[n.level], headline.len - d.indent)
  185. result.add(lvl)
  186. result.add("\n")
  187. result.add(headline)
  188. result.add("\n")
  189. result.add(ind)
  190. result.add(lvl)
  191. of rnTransition:
  192. result.add("\n\n")
  193. result.add(ind)
  194. result.add repeat('-', 78-d.indent)
  195. result.add("\n\n")
  196. of rnParagraph:
  197. result.add("\n\n")
  198. result.add(ind)
  199. renderRstSons(d, n, result)
  200. of rnBulletItem:
  201. inc(d.indent, 2)
  202. var tmp = ""
  203. renderRstSons(d, n, tmp)
  204. if tmp.len > 0:
  205. result.add("\n")
  206. result.add(ind)
  207. result.add("* ")
  208. result.add(tmp)
  209. dec(d.indent, 2)
  210. of rnEnumItem:
  211. inc(d.indent, 4)
  212. var tmp = ""
  213. renderRstSons(d, n, tmp)
  214. if tmp.len > 0:
  215. result.add("\n")
  216. result.add(ind)
  217. result.add("(#) ")
  218. result.add(tmp)
  219. dec(d.indent, 4)
  220. of rnOptionList, rnFieldList, rnDefList, rnDefItem, rnLineBlock, rnFieldName,
  221. rnFieldBody, rnStandaloneHyperlink, rnBulletList, rnEnumList:
  222. renderRstSons(d, n, result)
  223. of rnDefName:
  224. result.add("\n\n")
  225. result.add(ind)
  226. renderRstSons(d, n, result)
  227. of rnDefBody:
  228. inc(d.indent, 2)
  229. if n.sons[0].kind != rnBulletList:
  230. result.add("\n")
  231. result.add(ind)
  232. result.add(" ")
  233. renderRstSons(d, n, result)
  234. dec(d.indent, 2)
  235. of rnField:
  236. var tmp = ""
  237. renderRstToRst(d, n.sons[0], tmp)
  238. var L = max(tmp.len + 3, 30)
  239. inc(d.indent, L)
  240. result.add "\n"
  241. result.add ind
  242. result.add ':'
  243. result.add tmp
  244. result.add ':'
  245. result.add spaces(L - tmp.len - 2)
  246. renderRstToRst(d, n.sons[1], result)
  247. dec(d.indent, L)
  248. of rnLineBlockItem:
  249. result.add("\n")
  250. result.add(ind)
  251. result.add("| ")
  252. renderRstSons(d, n, result)
  253. of rnBlockQuote:
  254. inc(d.indent, 2)
  255. renderRstSons(d, n, result)
  256. dec(d.indent, 2)
  257. of rnRstRef:
  258. result.add("`")
  259. renderRstSons(d, n, result)
  260. result.add("`_")
  261. of rnHyperlink:
  262. result.add('`')
  263. renderRstToRst(d, n.sons[0], result)
  264. result.add(" <")
  265. renderRstToRst(d, n.sons[1], result)
  266. result.add(">`_")
  267. of rnUnknownRole:
  268. result.add('`')
  269. renderRstToRst(d, n.sons[0],result)
  270. result.add("`:")
  271. renderRstToRst(d, n.sons[1],result)
  272. result.add(':')
  273. of rnSub:
  274. result.add('`')
  275. renderRstSons(d, n, result)
  276. result.add("`:sub:")
  277. of rnSup:
  278. result.add('`')
  279. renderRstSons(d, n, result)
  280. result.add("`:sup:")
  281. of rnIdx:
  282. result.add('`')
  283. renderRstSons(d, n, result)
  284. result.add("`:idx:")
  285. of rnEmphasis:
  286. result.add("*")
  287. renderRstSons(d, n, result)
  288. result.add("*")
  289. of rnStrongEmphasis:
  290. result.add("**")
  291. renderRstSons(d, n, result)
  292. result.add("**")
  293. of rnTripleEmphasis:
  294. result.add("***")
  295. renderRstSons(d, n, result)
  296. result.add("***")
  297. of rnInterpretedText:
  298. result.add('`')
  299. renderRstSons(d, n, result)
  300. result.add('`')
  301. of rnInlineLiteral:
  302. inc(d.verbatim)
  303. result.add("``")
  304. renderRstSons(d, n, result)
  305. result.add("``")
  306. dec(d.verbatim)
  307. of rnSmiley:
  308. result.add(n.text)
  309. of rnLeaf:
  310. if d.verbatim == 0 and n.text == "\\":
  311. result.add("\\\\") # XXX: escape more special characters!
  312. else:
  313. result.add(n.text)
  314. of rnIndex:
  315. result.add("\n\n")
  316. result.add(ind)
  317. result.add(".. index::\n")
  318. inc(d.indent, 3)
  319. if n.sons[2] != nil: renderRstSons(d, n.sons[2], result)
  320. dec(d.indent, 3)
  321. of rnContents:
  322. result.add("\n\n")
  323. result.add(ind)
  324. result.add(".. contents::")
  325. else:
  326. result.add("Error: cannot render: " & $n.kind)
  327. proc renderRstToRst*(n: PRstNode, result: var string) =
  328. ## renders `n` into its string representation and appends to `result`.
  329. var d: RenderContext
  330. renderRstToRst(d, n, result)
  331. proc renderRstToJsonNode(node: PRstNode): JsonNode =
  332. result =
  333. %[
  334. (key: "kind", val: %($node.kind)),
  335. (key: "level", val: %BiggestInt(node.level))
  336. ]
  337. if node.kind in {rnLeaf, rnSmiley} and node.text.len > 0:
  338. result.add("text", %node.text)
  339. if len(node.sons) > 0:
  340. var accm = newSeq[JsonNode](len(node.sons))
  341. for i, son in node.sons:
  342. accm[i] = renderRstToJsonNode(son)
  343. result.add("sons", %accm)
  344. proc renderRstToJson*(node: PRstNode): string =
  345. ## Writes the given RST node as JSON that is in the form
  346. ##
  347. ## {
  348. ## "kind":string node.kind,
  349. ## "text":optional string node.text,
  350. ## "level":optional int node.level,
  351. ## "sons":optional node array
  352. ## }
  353. renderRstToJsonNode(node).pretty
  354. proc renderRstToText*(node: PRstNode): string =
  355. ## minimal text representation of markup node
  356. const code = {rnCodeFragment, rnInterpretedText, rnInlineLiteral, rnInlineCode}
  357. if node == nil:
  358. return ""
  359. case node.kind
  360. of rnLeaf, rnSmiley:
  361. result.add node.text
  362. else:
  363. if node.kind in code: result.add "`"
  364. for i in 0 ..< node.sons.len:
  365. if node.kind in {rnInlineCode, rnCodeBlock} and i == 0:
  366. continue # omit language specifier
  367. result.add renderRstToText(node.sons[i])
  368. if node.kind in code: result.add "`"
  369. proc treeRepr*(node: PRstNode, indent=0): string =
  370. ## Writes the parsed RST `node` into an AST tree with compact string
  371. ## representation in the format (one line per every sub-node):
  372. ## ``indent - kind - [text|level|order|adType] - anchor (if non-zero)``
  373. ## (suitable for debugging of RST parsing).
  374. if node == nil:
  375. result.add " ".repeat(indent) & "[nil]\n"
  376. return
  377. result.add " ".repeat(indent) & $node.kind
  378. case node.kind
  379. of rnLeaf, rnSmiley:
  380. result.add (if node.text == "": "" else: " '" & node.text & "'")
  381. of rnEnumList:
  382. result.add " labelFmt=" & node.labelFmt
  383. of rnLineBlockItem:
  384. var txt: string
  385. if node.lineIndent == "\n": txt = " (blank line)"
  386. else: txt = " lineIndent=" & $node.lineIndent.len
  387. result.add txt
  388. of rnAdmonition:
  389. result.add " adType=" & node.adType
  390. of rnHeadline, rnOverline, rnMarkdownHeadline:
  391. result.add " level=" & $node.level
  392. of rnFootnote, rnCitation, rnOptionListItem:
  393. result.add (if node.order == 0: "" else: " order=" & $node.order)
  394. of rnMarkdownBlockQuoteItem:
  395. result.add " quotationDepth=" & $node.quotationDepth
  396. of rnTable, rnGridTable, rnMarkdownTable:
  397. result.add " colCount=" & $node.colCount
  398. of rnTableHeaderCell, rnTableDataCell:
  399. if node.span > 0:
  400. result.add " span=" & $node.span
  401. of rnTableRow:
  402. if node.endsHeader: result.add " endsHeader"
  403. else:
  404. discard
  405. result.add (if node.anchor == "": "" else: " anchor='" & node.anchor & "'")
  406. result.add "\n"
  407. for son in node.sons:
  408. result.add treeRepr(son, indent=indent+2)