rstidx.nim 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. #
  2. # Nim's Runtime Library
  3. # (c) Copyright 2022 Andreas Rumpf
  4. #
  5. # See the file "copying.txt", included in this
  6. # distribution, for details about the copyright.
  7. ## Nim `idx`:idx: file format related definitions.
  8. import std/[strutils, syncio, hashes]
  9. from std/os import splitFile
  10. type
  11. IndexEntryKind* = enum ## discriminator tag
  12. ieMarkupTitle = "markupTitle"
  13. ## RST/Markdown title, text in `keyword` +
  14. ## HTML text in `linkTitle`
  15. ieNimTitle = "nimTitle"
  16. ## Nim title
  17. ieHeading = "heading" ## RST/Markdown markup heading, escaped
  18. ieIdxRole = "idx" ## RST :idx: definition, escaped
  19. ieNim = "nim" ## Nim symbol, unescaped
  20. ieNimGroup = "nimgrp" ## Nim overload group, unescaped
  21. IndexEntry* = object
  22. kind*: IndexEntryKind ## 0.
  23. keyword*: string ## 1.
  24. link*: string ## 2.
  25. linkTitle*: string ## 3. contains a prettier text for the href
  26. linkDesc*: string ## 4. the title attribute of the final href
  27. line*: int ## 5.
  28. module*: string ## origin file, NOT a field in ``.idx`` file
  29. aux*: string ## auxuliary field, NOT a field in ``.idx`` file
  30. proc isDocumentationTitle*(hyperlink: string): bool =
  31. ## Returns true if the hyperlink is actually a documentation title.
  32. ##
  33. ## Documentation titles lack the hash. See `mergeIndexes()
  34. ## <#mergeIndexes,string>`_ for a more detailed explanation.
  35. result = hyperlink.find('#') < 0
  36. proc `$`*(e: IndexEntry): string =
  37. """("$1", "$2", "$3", "$4", $5)""" % [
  38. e.keyword, e.link, e.linkTitle, e.linkDesc, $e.line]
  39. proc quoteIndexColumn(text: string): string =
  40. ## Returns a safe version of `text` for serialization to the ``.idx`` file.
  41. ##
  42. ## The returned version can be put without worries in a line based tab
  43. ## separated column text file. The following character sequence replacements
  44. ## will be performed for that goal:
  45. ##
  46. ## * ``"\\"`` => ``"\\\\"``
  47. ## * ``"\n"`` => ``"\\n"``
  48. ## * ``"\t"`` => ``"\\t"``
  49. result = newStringOfCap(text.len + 3)
  50. for c in text:
  51. case c
  52. of '\\': result.add "\\"
  53. of '\L': result.add "\\n"
  54. of '\C': discard
  55. of '\t': result.add "\\t"
  56. else: result.add c
  57. proc unquoteIndexColumn*(text: string): string =
  58. ## Returns the unquoted version generated by ``quoteIndexColumn``.
  59. result = text.multiReplace(("\\t", "\t"), ("\\n", "\n"), ("\\\\", "\\"))
  60. proc formatIndexEntry*(kind: IndexEntryKind; htmlFile, id, term, linkTitle,
  61. linkDesc: string, line: int):
  62. tuple[entry: string, isTitle: bool] =
  63. result.entry = $kind
  64. result.entry.add('\t')
  65. result.entry.add term
  66. result.entry.add('\t')
  67. result.entry.add(htmlFile)
  68. if id.len > 0:
  69. result.entry.add('#')
  70. result.entry.add(id)
  71. result.isTitle = false
  72. else:
  73. result.isTitle = true
  74. result.entry.add('\t' & linkTitle.quoteIndexColumn)
  75. result.entry.add('\t' & linkDesc.quoteIndexColumn)
  76. result.entry.add('\t' & $line)
  77. result.entry.add("\n")
  78. proc parseIndexEntryKind(s: string): IndexEntryKind =
  79. result = case s:
  80. of "nim": ieNim
  81. of "nimgrp": ieNimGroup
  82. of "heading": ieHeading
  83. of "idx": ieIdxRole
  84. of "nimTitle": ieNimTitle
  85. of "markupTitle": ieMarkupTitle
  86. else: raise newException(ValueError, "unknown index entry value $1" % [s])
  87. proc parseIdxFile*(path: string):
  88. tuple[fileEntries: seq[IndexEntry], title: IndexEntry] =
  89. var
  90. f = 0
  91. newSeq(result.fileEntries, 500)
  92. setLen(result.fileEntries, 0)
  93. let (_, base, _) = path.splitFile
  94. for line in lines(path):
  95. let s = line.find('\t')
  96. if s < 0: continue
  97. setLen(result.fileEntries, f+1)
  98. let cols = line.split('\t')
  99. result.fileEntries[f].kind = parseIndexEntryKind(cols[0])
  100. result.fileEntries[f].keyword = cols[1]
  101. result.fileEntries[f].link = cols[2]
  102. if result.fileEntries[f].kind == ieIdxRole:
  103. result.fileEntries[f].module = base
  104. else:
  105. if result.title.keyword.len == 0:
  106. result.fileEntries[f].module = base
  107. else:
  108. result.fileEntries[f].module = result.title.keyword
  109. result.fileEntries[f].linkTitle = cols[3].unquoteIndexColumn
  110. result.fileEntries[f].linkDesc = cols[4].unquoteIndexColumn
  111. result.fileEntries[f].line = parseInt(cols[5])
  112. if result.fileEntries[f].kind in {ieNimTitle, ieMarkupTitle}:
  113. result.title = result.fileEntries[f]
  114. inc f
  115. proc cmp*(a, b: IndexEntry): int =
  116. ## Sorts two ``IndexEntry`` first by `keyword` field, then by `link`.
  117. result = cmpIgnoreStyle(a.keyword, b.keyword)
  118. if result == 0:
  119. result = cmpIgnoreStyle(a.link, b.link)
  120. proc hash*(x: IndexEntry): Hash =
  121. ## Returns the hash for the combined fields of the type.
  122. ##
  123. ## The hash is computed as the chained hash of the individual string hashes.
  124. result = x.keyword.hash !& x.link.hash
  125. result = result !& x.linkTitle.hash
  126. result = result !& x.linkDesc.hash
  127. result = !$result