strtabs.nim 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2012 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## The ``strtabs`` module implements an efficient hash table that is a mapping
  10. ## from strings to strings. Supports a case-sensitive, case-insensitive and
  11. ## style-insensitive mode.
  12. runnableExamples:
  13. var t = newStringTable()
  14. t["name"] = "John"
  15. t["city"] = "Monaco"
  16. doAssert t.len == 2
  17. doAssert t.hasKey "name"
  18. doAssert "name" in t
  19. ## String tables can be created from a table constructor:
  20. runnableExamples:
  21. var t = {"name": "John", "city": "Monaco"}.newStringTable
  22. ## When using the style insensitive mode (``modeStyleInsensitive``),
  23. ## all letters are compared case insensitively within the ASCII range
  24. ## and underscores are ignored.
  25. runnableExamples:
  26. var x = newStringTable(modeStyleInsensitive)
  27. x["first_name"] = "John"
  28. x["LastName"] = "Doe"
  29. doAssert x["firstName"] == "John"
  30. doAssert x["last_name"] == "Doe"
  31. ## An efficient string substitution operator
  32. ## `% <#%25,string,StringTableRef,set[FormatFlag]>`_ for the string table
  33. ## is also provided.
  34. runnableExamples:
  35. var t = {"name": "John", "city": "Monaco"}.newStringTable
  36. doAssert "${name} lives in ${city}" % t == "John lives in Monaco"
  37. ## **See also:**
  38. ## * `tables module <tables.html>`_ for general hash tables
  39. ## * `sharedtables module<sharedtables.html>`_ for shared hash table support
  40. ## * `strutils module<strutils.html>`_ for common string functions
  41. ## * `json module<json.html>`_ for table-like structure which allows
  42. ## heterogeneous members
  43. import std/private/since
  44. import
  45. hashes, strutils
  46. when defined(nimPreviewSlimSystem):
  47. import std/assertions
  48. when defined(js) or defined(nimscript) or defined(Standalone):
  49. {.pragma: rtlFunc.}
  50. else:
  51. {.pragma: rtlFunc, rtl.}
  52. import std/envvars
  53. include "system/inclrtl"
  54. type
  55. StringTableMode* = enum ## Describes the tables operation mode.
  56. modeCaseSensitive, ## the table is case sensitive
  57. modeCaseInsensitive, ## the table is case insensitive
  58. modeStyleInsensitive ## the table is style insensitive
  59. KeyValuePair = tuple[key, val: string, hasValue: bool]
  60. KeyValuePairSeq = seq[KeyValuePair]
  61. StringTableObj* = object of RootObj
  62. counter: int
  63. data: KeyValuePairSeq
  64. mode: StringTableMode
  65. StringTableRef* = ref StringTableObj
  66. FormatFlag* = enum ## Flags for the `%` operator.
  67. useEnvironment, ## Use environment variable if the ``$key``
  68. ## is not found in the table.
  69. ## Does nothing when using `js` target.
  70. useEmpty, ## Use the empty string as a default, thus it
  71. ## won't throw an exception if ``$key`` is not
  72. ## in the table.
  73. useKey ## Do not replace ``$key`` if it is not found
  74. ## in the table (or in the environment).
  75. const
  76. growthFactor = 2
  77. startSize = 64
  78. proc mode*(t: StringTableRef): StringTableMode {.inline.} = t.mode
  79. iterator pairs*(t: StringTableRef): tuple[key, value: string] =
  80. ## Iterates over every `(key, value)` pair in the table `t`.
  81. for h in 0..high(t.data):
  82. if t.data[h].hasValue:
  83. yield (t.data[h].key, t.data[h].val)
  84. iterator keys*(t: StringTableRef): string =
  85. ## Iterates over every key in the table `t`.
  86. for h in 0..high(t.data):
  87. if t.data[h].hasValue:
  88. yield t.data[h].key
  89. iterator values*(t: StringTableRef): string =
  90. ## Iterates over every value in the table `t`.
  91. for h in 0..high(t.data):
  92. if t.data[h].hasValue:
  93. yield t.data[h].val
  94. proc myhash(t: StringTableRef, key: string): Hash =
  95. case t.mode
  96. of modeCaseSensitive: result = hashes.hash(key)
  97. of modeCaseInsensitive: result = hashes.hashIgnoreCase(key)
  98. of modeStyleInsensitive: result = hashes.hashIgnoreStyle(key)
  99. proc myCmp(t: StringTableRef, a, b: string): bool =
  100. case t.mode
  101. of modeCaseSensitive: result = cmp(a, b) == 0
  102. of modeCaseInsensitive: result = cmpIgnoreCase(a, b) == 0
  103. of modeStyleInsensitive: result = cmpIgnoreStyle(a, b) == 0
  104. proc mustRehash(length, counter: int): bool =
  105. assert(length > counter)
  106. result = (length * 2 < counter * 3) or (length - counter < 4)
  107. proc nextTry(h, maxHash: Hash): Hash {.inline.} =
  108. result = (h + 1) and maxHash
  109. proc rawGet(t: StringTableRef, key: string): int =
  110. var h: Hash = myhash(t, key) and high(t.data) # start with real hash value
  111. while t.data[h].hasValue:
  112. if myCmp(t, t.data[h].key, key):
  113. return h
  114. h = nextTry(h, high(t.data))
  115. result = - 1
  116. template get(t: StringTableRef, key: string) =
  117. var index = rawGet(t, key)
  118. if index >= 0: result = t.data[index].val
  119. else:
  120. raise newException(KeyError, "key not found: " & key)
  121. proc len*(t: StringTableRef): int {.rtlFunc, extern: "nst$1".} =
  122. ## Returns the number of keys in `t`.
  123. result = t.counter
  124. proc `[]`*(t: StringTableRef, key: string): var string {.
  125. rtlFunc, extern: "nstTake".} =
  126. ## Retrieves the location at ``t[key]``.
  127. ##
  128. ## If `key` is not in `t`, the ``KeyError`` exception is raised.
  129. ## One can check with `hasKey proc <#hasKey,StringTableRef,string>`_
  130. ## whether the key exists.
  131. ##
  132. ## See also:
  133. ## * `getOrDefault proc <#getOrDefault,StringTableRef,string,string>`_
  134. ## * `[]= proc <#[]=,StringTableRef,string,string>`_ for inserting a new
  135. ## (key, value) pair in the table
  136. ## * `hasKey proc <#hasKey,StringTableRef,string>`_ for checking if a key
  137. ## is in the table
  138. runnableExamples:
  139. var t = {"name": "John", "city": "Monaco"}.newStringTable
  140. doAssert t["name"] == "John"
  141. doAssertRaises(KeyError):
  142. echo t["occupation"]
  143. get(t, key)
  144. proc getOrDefault*(t: StringTableRef; key: string,
  145. default: string = ""): string =
  146. ## Retrieves the location at ``t[key]``.
  147. ##
  148. ## If `key` is not in `t`, the default value is returned (if not specified,
  149. ## it is an empty string (`""`)).
  150. ##
  151. ## See also:
  152. ## * `[] proc <#[],StringTableRef,string>`_ for retrieving a value of a key
  153. ## * `hasKey proc <#hasKey,StringTableRef,string>`_ for checking if a key
  154. ## is in the table
  155. ## * `[]= proc <#[]=,StringTableRef,string,string>`_ for inserting a new
  156. ## (key, value) pair in the table
  157. runnableExamples:
  158. var t = {"name": "John", "city": "Monaco"}.newStringTable
  159. doAssert t.getOrDefault("name") == "John"
  160. doAssert t.getOrDefault("occupation") == ""
  161. doAssert t.getOrDefault("occupation", "teacher") == "teacher"
  162. doAssert t.getOrDefault("name", "Paul") == "John"
  163. var index = rawGet(t, key)
  164. if index >= 0: result = t.data[index].val
  165. else: result = default
  166. proc hasKey*(t: StringTableRef, key: string): bool {.rtlFunc,
  167. extern: "nst$1".} =
  168. ## Returns true if `key` is in the table `t`.
  169. ##
  170. ## See also:
  171. ## * `getOrDefault proc <#getOrDefault,StringTableRef,string,string>`_
  172. ## * `contains proc <#contains,StringTableRef,string>`_
  173. runnableExamples:
  174. var t = {"name": "John", "city": "Monaco"}.newStringTable
  175. doAssert t.hasKey("name")
  176. doAssert not t.hasKey("occupation")
  177. result = rawGet(t, key) >= 0
  178. proc contains*(t: StringTableRef, key: string): bool =
  179. ## Alias of `hasKey proc <#hasKey,StringTableRef,string>`_ for use with
  180. ## the `in` operator.
  181. runnableExamples:
  182. var t = {"name": "John", "city": "Monaco"}.newStringTable
  183. doAssert "name" in t
  184. doAssert "occupation" notin t
  185. return hasKey(t, key)
  186. proc rawInsert(t: StringTableRef, data: var KeyValuePairSeq, key, val: string) =
  187. var h: Hash = myhash(t, key) and high(data)
  188. while data[h].hasValue:
  189. h = nextTry(h, high(data))
  190. data[h].key = key
  191. data[h].val = val
  192. data[h].hasValue = true
  193. proc enlarge(t: StringTableRef) =
  194. var n: KeyValuePairSeq
  195. newSeq(n, len(t.data) * growthFactor)
  196. for i in countup(0, high(t.data)):
  197. if t.data[i].hasValue: rawInsert(t, n, move t.data[i].key, move t.data[i].val)
  198. swap(t.data, n)
  199. proc `[]=`*(t: StringTableRef, key, val: string) {.
  200. rtlFunc, extern: "nstPut".} =
  201. ## Inserts a `(key, value)` pair into `t`.
  202. ##
  203. ## See also:
  204. ## * `[] proc <#[],StringTableRef,string>`_ for retrieving a value of a key
  205. ## * `del proc <#del,StringTableRef,string>`_ for removing a key from the table
  206. runnableExamples:
  207. var t = {"name": "John", "city": "Monaco"}.newStringTable
  208. t["occupation"] = "teacher"
  209. doAssert t.hasKey("occupation")
  210. var index = rawGet(t, key)
  211. if index >= 0:
  212. t.data[index].val = val
  213. else:
  214. if mustRehash(len(t.data), t.counter): enlarge(t)
  215. rawInsert(t, t.data, key, val)
  216. inc(t.counter)
  217. proc newStringTable*(mode: StringTableMode): owned(StringTableRef) {.
  218. rtlFunc, extern: "nst$1", noSideEffect.} =
  219. ## Creates a new empty string table.
  220. ##
  221. ## See also:
  222. ## * `newStringTable(keyValuePairs) proc
  223. ## <#newStringTable,varargs[tuple[string,string]],StringTableMode>`_
  224. result = StringTableRef(mode: mode, counter: 0, data: newSeq[KeyValuePair](startSize))
  225. proc newStringTable*(keyValuePairs: varargs[string],
  226. mode: StringTableMode): owned(StringTableRef) {.
  227. rtlFunc, extern: "nst$1WithPairs", noSideEffect.} =
  228. ## Creates a new string table with given `key, value` string pairs.
  229. ##
  230. ## `StringTableMode` must be specified.
  231. runnableExamples:
  232. var mytab = newStringTable("key1", "val1", "key2", "val2",
  233. modeCaseInsensitive)
  234. result = newStringTable(mode)
  235. var i = 0
  236. while i < high(keyValuePairs):
  237. {.noSideEffect.}:
  238. result[keyValuePairs[i]] = keyValuePairs[i + 1]
  239. inc(i, 2)
  240. proc newStringTable*(keyValuePairs: varargs[tuple[key, val: string]],
  241. mode: StringTableMode = modeCaseSensitive): owned(StringTableRef) {.
  242. rtlFunc, extern: "nst$1WithTableConstr", noSideEffect.} =
  243. ## Creates a new string table with given `(key, value)` tuple pairs.
  244. ##
  245. ## The default mode is case sensitive.
  246. runnableExamples:
  247. var
  248. mytab1 = newStringTable({"key1": "val1", "key2": "val2"}, modeCaseInsensitive)
  249. mytab2 = newStringTable([("key3", "val3"), ("key4", "val4")])
  250. result = newStringTable(mode)
  251. for key, val in items(keyValuePairs):
  252. {.noSideEffect.}:
  253. result[key] = val
  254. proc raiseFormatException(s: string) =
  255. raise newException(ValueError, "format string: key not found: " & s)
  256. proc getValue(t: StringTableRef, flags: set[FormatFlag], key: string): string =
  257. if hasKey(t, key): return t.getOrDefault(key)
  258. when defined(js) or defined(nimscript) or defined(Standalone):
  259. result = ""
  260. else:
  261. if useEnvironment in flags: result = getEnv(key)
  262. else: result = ""
  263. if result.len == 0:
  264. if useKey in flags: result = '$' & key
  265. elif useEmpty notin flags: raiseFormatException(key)
  266. proc clear*(s: StringTableRef, mode: StringTableMode) {.
  267. rtlFunc, extern: "nst$1".} =
  268. ## Resets a string table to be empty again, perhaps altering the mode.
  269. ##
  270. ## See also:
  271. ## * `del proc <#del,StringTableRef,string>`_ for removing a key from the table
  272. runnableExamples:
  273. var t = {"name": "John", "city": "Monaco"}.newStringTable
  274. clear(t, modeCaseSensitive)
  275. doAssert len(t) == 0
  276. doAssert "name" notin t
  277. doAssert "city" notin t
  278. s.mode = mode
  279. s.counter = 0
  280. s.data.setLen(startSize)
  281. for i in 0..<s.data.len:
  282. s.data[i].hasValue = false
  283. proc clear*(s: StringTableRef) {.since: (1, 1).} =
  284. ## Resets a string table to be empty again without changing the mode.
  285. s.clear(s.mode)
  286. proc del*(t: StringTableRef, key: string) =
  287. ## Removes `key` from `t`.
  288. ##
  289. ## See also:
  290. ## * `clear proc <#clear,StringTableRef,StringTableMode>`_ for resetting a
  291. ## table to be empty
  292. ## * `[]= proc <#[]=,StringTableRef,string,string>`_ for inserting a new
  293. ## (key, value) pair in the table
  294. runnableExamples:
  295. var t = {"name": "John", "city": "Monaco"}.newStringTable
  296. t.del("name")
  297. doAssert len(t) == 1
  298. doAssert "name" notin t
  299. doAssert "city" in t
  300. # Impl adapted from `tableimpl.delImplIdx`
  301. var i = rawGet(t, key)
  302. let msk = high(t.data)
  303. if i >= 0:
  304. dec(t.counter)
  305. block outer:
  306. while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1
  307. var j = i # The correctness of this depends on (h+1) in nextTry,
  308. var r = j # though may be adaptable to other simple sequences.
  309. t.data[i].hasValue = false # mark current EMPTY
  310. t.data[i].key = ""
  311. t.data[i].val = ""
  312. while true:
  313. i = (i + 1) and msk # increment mod table size
  314. if not t.data[i].hasValue: # end of collision cluster; So all done
  315. break outer
  316. r = t.myhash(t.data[i].key) and msk # "home" location of key@i
  317. if not ((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)):
  318. break
  319. when defined(js):
  320. t.data[j] = t.data[i]
  321. elif defined(gcDestructors):
  322. t.data[j] = move t.data[i]
  323. else:
  324. shallowCopy(t.data[j], t.data[i]) # data[j] will be marked EMPTY next loop
  325. proc `$`*(t: StringTableRef): string {.rtlFunc, extern: "nstDollar".} =
  326. ## The `$` operator for string tables. Used internally when calling
  327. ## `echo` on a table.
  328. if t.len == 0:
  329. result = "{:}"
  330. else:
  331. result = "{"
  332. for key, val in pairs(t):
  333. if result.len > 1: result.add(", ")
  334. result.add(key)
  335. result.add(": ")
  336. result.add(val)
  337. result.add("}")
  338. proc `%`*(f: string, t: StringTableRef, flags: set[FormatFlag] = {}): string {.
  339. rtlFunc, extern: "nstFormat".} =
  340. ## The `%` operator for string tables.
  341. runnableExamples:
  342. var t = {"name": "John", "city": "Monaco"}.newStringTable
  343. doAssert "${name} lives in ${city}" % t == "John lives in Monaco"
  344. const
  345. PatternChars = {'a'..'z', 'A'..'Z', '0'..'9', '_', '\x80'..'\xFF'}
  346. result = ""
  347. var i = 0
  348. while i < len(f):
  349. if f[i] == '$':
  350. case f[i+1]
  351. of '$':
  352. add(result, '$')
  353. inc(i, 2)
  354. of '{':
  355. var j = i + 1
  356. while j < f.len and f[j] != '}': inc(j)
  357. add(result, getValue(t, flags, substr(f, i+2, j-1)))
  358. i = j + 1
  359. of 'a'..'z', 'A'..'Z', '\x80'..'\xFF', '_':
  360. var j = i + 1
  361. while j < f.len and f[j] in PatternChars: inc(j)
  362. add(result, getValue(t, flags, substr(f, i+1, j-1)))
  363. i = j
  364. else:
  365. add(result, f[i])
  366. inc(i)
  367. else:
  368. add(result, f[i])
  369. inc(i)