bitabs.nim 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. ## A BiTable is a table that can be seen as an optimized pair
  2. ## of `(Table[LitId, Val], Table[Val, LitId])`.
  3. import std/hashes
  4. import rodfiles
  5. when defined(nimPreviewSlimSystem):
  6. import std/assertions
  7. type
  8. LitId* = distinct uint32
  9. BiTable*[T] = object
  10. vals: seq[T] # indexed by LitId
  11. keys: seq[LitId] # indexed by hash(val)
  12. proc initBiTable*[T](): BiTable[T] = BiTable[T](vals: @[], keys: @[])
  13. proc nextTry(h, maxHash: Hash): Hash {.inline.} =
  14. result = (h + 1) and maxHash
  15. template maxHash(t): untyped = high(t.keys)
  16. template isFilled(x: LitId): bool = x.uint32 > 0'u32
  17. proc `$`*(x: LitId): string {.borrow.}
  18. proc `<`*(x, y: LitId): bool {.borrow.}
  19. proc `<=`*(x, y: LitId): bool {.borrow.}
  20. proc `==`*(x, y: LitId): bool {.borrow.}
  21. proc hash*(x: LitId): Hash {.borrow.}
  22. proc len*[T](t: BiTable[T]): int = t.vals.len
  23. proc mustRehash(length, counter: int): bool {.inline.} =
  24. assert(length > counter)
  25. result = (length * 2 < counter * 3) or (length - counter < 4)
  26. const
  27. idStart = 1
  28. template idToIdx(x: LitId): int = x.int - idStart
  29. proc hasLitId*[T](t: BiTable[T]; x: LitId): bool =
  30. let idx = idToIdx(x)
  31. result = idx >= 0 and idx < t.vals.len
  32. proc enlarge[T](t: var BiTable[T]) =
  33. var n: seq[LitId]
  34. newSeq(n, len(t.keys) * 2)
  35. swap(t.keys, n)
  36. for i in 0..high(n):
  37. let eh = n[i]
  38. if isFilled(eh):
  39. var j = hash(t.vals[idToIdx eh]) and maxHash(t)
  40. while isFilled(t.keys[j]):
  41. j = nextTry(j, maxHash(t))
  42. t.keys[j] = move n[i]
  43. proc getKeyId*[T](t: BiTable[T]; v: T): LitId =
  44. let origH = hash(v)
  45. var h = origH and maxHash(t)
  46. if t.keys.len != 0:
  47. while true:
  48. let litId = t.keys[h]
  49. if not isFilled(litId): break
  50. if t.vals[idToIdx t.keys[h]] == v: return litId
  51. h = nextTry(h, maxHash(t))
  52. return LitId(0)
  53. proc getOrIncl*[T](t: var BiTable[T]; v: T): LitId =
  54. let origH = hash(v)
  55. var h = origH and maxHash(t)
  56. if t.keys.len != 0:
  57. while true:
  58. let litId = t.keys[h]
  59. if not isFilled(litId): break
  60. if t.vals[idToIdx t.keys[h]] == v: return litId
  61. h = nextTry(h, maxHash(t))
  62. # not found, we need to insert it:
  63. if mustRehash(t.keys.len, t.vals.len):
  64. enlarge(t)
  65. # recompute where to insert:
  66. h = origH and maxHash(t)
  67. while true:
  68. let litId = t.keys[h]
  69. if not isFilled(litId): break
  70. h = nextTry(h, maxHash(t))
  71. else:
  72. setLen(t.keys, 16)
  73. h = origH and maxHash(t)
  74. result = LitId(t.vals.len + idStart)
  75. t.keys[h] = result
  76. t.vals.add v
  77. proc `[]`*[T](t: var BiTable[T]; litId: LitId): var T {.inline.} =
  78. let idx = idToIdx litId
  79. assert idx < t.vals.len
  80. result = t.vals[idx]
  81. proc `[]`*[T](t: BiTable[T]; litId: LitId): lent T {.inline.} =
  82. let idx = idToIdx litId
  83. assert idx < t.vals.len
  84. result = t.vals[idx]
  85. proc hash*[T](t: BiTable[T]): Hash =
  86. ## as the keys are hashes of the values, we simply use them instead
  87. var h: Hash = 0
  88. for i, n in pairs t.keys:
  89. h = h !& hash((i, n))
  90. result = !$h
  91. proc store*[T](f: var RodFile; t: BiTable[T]) =
  92. storeSeq(f, t.vals)
  93. storeSeq(f, t.keys)
  94. proc load*[T](f: var RodFile; t: var BiTable[T]) =
  95. loadSeq(f, t.vals)
  96. loadSeq(f, t.keys)
  97. proc sizeOnDisc*(t: BiTable[string]): int =
  98. result = 4
  99. for x in t.vals:
  100. result += x.len + 4
  101. result += t.keys.len * sizeof(LitId)
  102. when isMainModule:
  103. var t: BiTable[string]
  104. echo getOrIncl(t, "hello")
  105. echo getOrIncl(t, "hello")
  106. echo getOrIncl(t, "hello3")
  107. echo getOrIncl(t, "hello4")
  108. echo getOrIncl(t, "helloasfasdfdsa")
  109. echo getOrIncl(t, "hello")
  110. echo getKeyId(t, "hello")
  111. echo getKeyId(t, "none")
  112. for i in 0 ..< 100_000:
  113. discard t.getOrIncl($i & "___" & $i)
  114. for i in 0 ..< 100_000:
  115. assert t.getOrIncl($i & "___" & $i).idToIdx == i + 4
  116. echo "begin"
  117. echo t.vals.len
  118. echo t.vals[0]
  119. echo t.vals[1004]
  120. echo "middle"
  121. var tf: BiTable[float]
  122. discard tf.getOrIncl(0.4)
  123. discard tf.getOrIncl(16.4)
  124. discard tf.getOrIncl(32.4)
  125. echo getKeyId(tf, 32.4)
  126. var f2 = open("testblah.bin", fmWrite)
  127. echo store(f2, tf)
  128. f2.close
  129. var f1 = open("testblah.bin", fmRead)
  130. var t2: BiTable[float]
  131. echo f1.load(t2)
  132. echo t2.vals.len
  133. echo getKeyId(t2, 32.4)
  134. echo "end"
  135. f1.close