tsimplesort.nim 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. discard """
  2. output: '''true'''
  3. """
  4. import hashes, math
  5. {.pragma: myShallow.}
  6. type
  7. TSlotEnum = enum seEmpty, seFilled, seDeleted
  8. TKeyValuePair[A, B] = tuple[slot: TSlotEnum, key: A, val: B]
  9. TKeyValuePairSeq[A, B] = seq[TKeyValuePair[A, B]]
  10. TTable*[A, B] {.final, myShallow.} = object
  11. data: TKeyValuePairSeq[A, B]
  12. counter: int
  13. proc len*[A, B](t: TTable[A, B]): int =
  14. ## returns the number of keys in `t`.
  15. result = t.counter
  16. iterator pairs*[A, B](t: TTable[A, B]): tuple[key: A, val: B] =
  17. ## iterates over any (key, value) pair in the table `t`.
  18. for h in 0..high(t.data):
  19. if t.data[h].slot == seFilled: yield (t.data[h].key, t.data[h].val)
  20. iterator keys*[A, B](t: TTable[A, B]): A =
  21. ## iterates over any key in the table `t`.
  22. for h in 0..high(t.data):
  23. if t.data[h].slot == seFilled: yield t.data[h].key
  24. iterator values*[A, B](t: TTable[A, B]): B =
  25. ## iterates over any value in the table `t`.
  26. for h in 0..high(t.data):
  27. if t.data[h].slot == seFilled: yield t.data[h].val
  28. const
  29. growthFactor = 2
  30. proc mustRehash(length, counter: int): bool {.inline.} =
  31. assert(length > counter)
  32. result = (length * 2 < counter * 3) or (length - counter < 4)
  33. proc nextTry(h, maxHash: Hash): Hash {.inline.} =
  34. result = ((5 * h) + 1) and maxHash
  35. template rawGetImpl() =
  36. var h: Hash = hash(key) and high(t.data) # start with real hash value
  37. while t.data[h].slot != seEmpty:
  38. if t.data[h].key == key and t.data[h].slot == seFilled:
  39. return h
  40. h = nextTry(h, high(t.data))
  41. result = -1
  42. template rawInsertImpl() =
  43. var h: Hash = hash(key) and high(data)
  44. while data[h].slot == seFilled:
  45. h = nextTry(h, high(data))
  46. data[h].key = key
  47. data[h].val = val
  48. data[h].slot = seFilled
  49. proc rawGet[A, B](t: TTable[A, B], key: A): int =
  50. rawGetImpl()
  51. proc `[]`*[A, B](t: TTable[A, B], key: A): B =
  52. ## retrieves the value at ``t[key]``. If `key` is not in `t`,
  53. ## default empty value for the type `B` is returned
  54. ## and no exception is raised. One can check with ``hasKey`` whether the key
  55. ## exists.
  56. var index = rawGet(t, key)
  57. if index >= 0: result = t.data[index].val
  58. proc hasKey*[A, B](t: TTable[A, B], key: A): bool =
  59. ## returns true iff `key` is in the table `t`.
  60. result = rawGet(t, key) >= 0
  61. proc rawInsert[A, B](t: var TTable[A, B], data: var TKeyValuePairSeq[A, B],
  62. key: A, val: B) =
  63. rawInsertImpl()
  64. proc enlarge[A, B](t: var TTable[A, B]) =
  65. var n: TKeyValuePairSeq[A, B]
  66. newSeq(n, len(t.data) * growthFactor)
  67. for i in countup(0, high(t.data)):
  68. if t.data[i].slot == seFilled: rawInsert(t, n, t.data[i].key, t.data[i].val)
  69. swap(t.data, n)
  70. template putImpl() =
  71. var index = rawGet(t, key)
  72. if index >= 0:
  73. t.data[index].val = val
  74. else:
  75. if mustRehash(len(t.data), t.counter): enlarge(t)
  76. rawInsert(t, t.data, key, val)
  77. inc(t.counter)
  78. proc `[]=`*[A, B](t: var TTable[A, B], key: A, val: B) =
  79. ## puts a (key, value)-pair into `t`.
  80. putImpl()
  81. proc del*[A, B](t: var TTable[A, B], key: A) =
  82. ## deletes `key` from hash table `t`.
  83. var index = rawGet(t, key)
  84. if index >= 0:
  85. t.data[index].slot = seDeleted
  86. dec(t.counter)
  87. proc initTable*[A, B](initialSize=64): TTable[A, B] =
  88. ## creates a new hash table that is empty. `initialSize` needs to be
  89. ## a power of two.
  90. assert isPowerOfTwo(initialSize)
  91. result.counter = 0
  92. newSeq(result.data, initialSize)
  93. proc toTable*[A, B](pairs: openArray[tuple[key: A,
  94. val: B]]): TTable[A, B] =
  95. ## creates a new hash table that contains the given `pairs`.
  96. result = initTable[A, B](nextPowerOfTwo(pairs.len+10))
  97. for key, val in items(pairs): result[key] = val
  98. template dollarImpl(): typed =
  99. if t.len == 0:
  100. result = "{:}"
  101. else:
  102. result = "{"
  103. for key, val in pairs(t):
  104. if result.len > 1: result.add(", ")
  105. result.add($key)
  106. result.add(": ")
  107. result.add($val)
  108. result.add("}")
  109. proc `$`*[A, B](t: TTable[A, B]): string =
  110. ## The `$` operator for hash tables.
  111. dollarImpl()
  112. # ------------------------------ count tables -------------------------------
  113. type
  114. TCountTable*[A] {.final, myShallow.} = object ## table that counts the number of each key
  115. data: seq[tuple[key: A, val: int]]
  116. counter: int
  117. proc len*[A](t: TCountTable[A]): int =
  118. ## returns the number of keys in `t`.
  119. result = t.counter
  120. iterator pairs*[A](t: TCountTable[A]): tuple[key: A, val: int] =
  121. ## iterates over any (key, value) pair in the table `t`.
  122. for h in 0..high(t.data):
  123. if t.data[h].val != 0: yield (t.data[h].key, t.data[h].val)
  124. iterator keys*[A](t: TCountTable[A]): A =
  125. ## iterates over any key in the table `t`.
  126. for h in 0..high(t.data):
  127. if t.data[h].val != 0: yield t.data[h].key
  128. iterator values*[A](t: TCountTable[A]): int =
  129. ## iterates over any value in the table `t`.
  130. for h in 0..high(t.data):
  131. if t.data[h].val != 0: yield t.data[h].val
  132. proc RawGet[A](t: TCountTable[A], key: A): int =
  133. var h: Hash = hash(key) and high(t.data) # start with real hash value
  134. while t.data[h].val != 0:
  135. if t.data[h].key == key: return h
  136. h = nextTry(h, high(t.data))
  137. result = -1
  138. proc `[]`*[A](t: TCountTable[A], key: A): int =
  139. ## retrieves the value at ``t[key]``. If `key` is not in `t`,
  140. ## 0 is returned. One can check with ``hasKey`` whether the key
  141. ## exists.
  142. var index = RawGet(t, key)
  143. if index >= 0: result = t.data[index].val
  144. proc hasKey*[A](t: TCountTable[A], key: A): bool =
  145. ## returns true iff `key` is in the table `t`.
  146. result = rawGet(t, key) >= 0
  147. proc rawInsert[A](t: TCountTable[A], data: var seq[tuple[key: A, val: int]],
  148. key: A, val: int) =
  149. var h: Hash = hash(key) and high(data)
  150. while data[h].val != 0: h = nextTry(h, high(data))
  151. data[h].key = key
  152. data[h].val = val
  153. proc enlarge[A](t: var TCountTable[A]) =
  154. var n: seq[tuple[key: A, val: int]]
  155. newSeq(n, len(t.data) * growthFactor)
  156. for i in countup(0, high(t.data)):
  157. if t.data[i].val != 0: rawInsert(t, n, t.data[i].key, t.data[i].val)
  158. swap(t.data, n)
  159. proc `[]=`*[A](t: var TCountTable[A], key: A, val: int) =
  160. ## puts a (key, value)-pair into `t`. `val` has to be positive.
  161. assert val > 0
  162. putImpl()
  163. proc initCountTable*[A](initialSize=64): TCountTable[A] =
  164. ## creates a new count table that is empty. `initialSize` needs to be
  165. ## a power of two.
  166. assert isPowerOfTwo(initialSize)
  167. result.counter = 0
  168. newSeq(result.data, initialSize)
  169. proc toCountTable*[A](keys: openArray[A]): TCountTable[A] =
  170. ## creates a new count table with every key in `keys` having a count of 1.
  171. result = initCountTable[A](nextPowerOfTwo(keys.len+10))
  172. for key in items(keys): result[key] = 1
  173. proc `$`*[A](t: TCountTable[A]): string =
  174. ## The `$` operator for count tables.
  175. dollarImpl()
  176. proc inc*[A](t: var TCountTable[A], key: A, val = 1) =
  177. ## increments `t[key]` by `val`.
  178. var index = RawGet(t, key)
  179. if index >= 0:
  180. inc(t.data[index].val, val)
  181. else:
  182. if mustRehash(len(t.data), t.counter): enlarge(t)
  183. rawInsert(t, t.data, key, val)
  184. inc(t.counter)
  185. proc smallest*[A](t: TCountTable[A]): tuple[key: A, val: int] =
  186. ## returns the largest (key,val)-pair. Efficiency: O(n)
  187. assert t.len > 0
  188. var minIdx = 0
  189. for h in 1..high(t.data):
  190. if t.data[h].val > 0 and t.data[minIdx].val > t.data[h].val: minIdx = h
  191. result.key = t.data[minIdx].key
  192. result.val = t.data[minIdx].val
  193. proc largest*[A](t: TCountTable[A]): tuple[key: A, val: int] =
  194. ## returns the (key,val)-pair with the largest `val`. Efficiency: O(n)
  195. assert t.len > 0
  196. var maxIdx = 0
  197. for h in 1..high(t.data):
  198. if t.data[maxIdx].val < t.data[h].val: maxIdx = h
  199. result.key = t.data[maxIdx].key
  200. result.val = t.data[maxIdx].val
  201. proc sort*[A](t: var TCountTable[A]) =
  202. ## sorts the count table so that the entry with the highest counter comes
  203. ## first. This is destructive! You must not modify `t` afterwards!
  204. ## You can use the iterators `pairs`, `keys`, and `values` to iterate over
  205. ## `t` in the sorted order.
  206. # we use shellsort here; fast enough and simple
  207. var h = 1
  208. while true:
  209. h = 3 * h + 1
  210. if h >= high(t.data): break
  211. while true:
  212. h = h div 3
  213. for i in countup(h, high(t.data)):
  214. var j = i
  215. while t.data[j-h].val <= t.data[j].val:
  216. var xyz = t.data[j]
  217. t.data[j] = t.data[j-h]
  218. t.data[j-h] = xyz
  219. j = j-h
  220. if j < h: break
  221. if h == 1: break
  222. const
  223. data = {
  224. "34": 123456, "12": 789,
  225. "90": 343, "0": 34404,
  226. "1": 344004, "2": 344774,
  227. "3": 342244, "4": 3412344,
  228. "5": 341232144, "6": 34214544,
  229. "7": 3434544, "8": 344544,
  230. "9": 34435644, "---00": 346677844,
  231. "10": 34484, "11": 34474, "19": 34464,
  232. "20": 34454, "30": 34141244, "40": 344114,
  233. "50": 344490, "60": 344491, "70": 344492,
  234. "80": 344497}
  235. proc countTableTest1 =
  236. var s = initTable[string, int](64)
  237. for key, val in items(data): s[key] = val
  238. var w: tuple[key: string, val: int] #type(otherCountTable.data[0])
  239. var t = initCountTable[string]()
  240. for k, v in items(data): t.inc(k)
  241. for k in t.keys: assert t[k] == 1
  242. t.inc("90", 3)
  243. t.inc("12", 2)
  244. t.inc("34", 1)
  245. assert t.largest()[0] == "90"
  246. t.sort()
  247. var i = 0
  248. for k, v in t.pairs:
  249. case i
  250. of 0: assert k == "90" and v == 4
  251. of 1: assert k == "12" and v == 3
  252. of 2: assert k == "34" and v == 2
  253. else: break
  254. inc i
  255. countTableTest1()
  256. echo true