bitabs.nim 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. ## A BiTable is a table that can be seen as an optimized pair
  2. ## of (Table[LitId, Val], Table[Val, LitId]).
  3. import hashes, rodfiles
  4. type
  5. LitId* = distinct uint32
  6. BiTable*[T] = object
  7. vals: seq[T] # indexed by LitId
  8. keys: seq[LitId] # indexed by hash(val)
  9. proc nextTry(h, maxHash: Hash): Hash {.inline.} =
  10. result = (h + 1) and maxHash
  11. template maxHash(t): untyped = high(t.keys)
  12. template isFilled(x: LitId): bool = x.uint32 > 0'u32
  13. proc `$`*(x: LitId): string {.borrow.}
  14. proc `<`*(x, y: LitId): bool {.borrow.}
  15. proc `<=`*(x, y: LitId): bool {.borrow.}
  16. proc `==`*(x, y: LitId): bool {.borrow.}
  17. proc hash*(x: LitId): Hash {.borrow.}
  18. proc len*[T](t: BiTable[T]): int = t.vals.len
  19. proc mustRehash(length, counter: int): bool {.inline.} =
  20. assert(length > counter)
  21. result = (length * 2 < counter * 3) or (length - counter < 4)
  22. const
  23. idStart = 256 ##
  24. ## Ids do not start with 0 but with this value. The IR needs it.
  25. ## TODO: explain why
  26. template idToIdx(x: LitId): int = x.int - idStart
  27. proc hasLitId*[T](t: BiTable[T]; x: LitId): bool =
  28. let idx = idToIdx(x)
  29. result = idx >= 0 and idx < t.vals.len
  30. proc enlarge[T](t: var BiTable[T]) =
  31. var n: seq[LitId]
  32. newSeq(n, len(t.keys) * 2)
  33. swap(t.keys, n)
  34. for i in 0..high(n):
  35. let eh = n[i]
  36. if isFilled(eh):
  37. var j = hash(t.vals[idToIdx eh]) and maxHash(t)
  38. while isFilled(t.keys[j]):
  39. j = nextTry(j, maxHash(t))
  40. t.keys[j] = move n[i]
  41. proc getKeyId*[T](t: BiTable[T]; v: T): LitId =
  42. let origH = hash(v)
  43. var h = origH and maxHash(t)
  44. if t.keys.len != 0:
  45. while true:
  46. let litId = t.keys[h]
  47. if not isFilled(litId): break
  48. if t.vals[idToIdx t.keys[h]] == v: return litId
  49. h = nextTry(h, maxHash(t))
  50. return LitId(0)
  51. proc getOrIncl*[T](t: var BiTable[T]; v: T): LitId =
  52. let origH = hash(v)
  53. var h = origH and maxHash(t)
  54. if t.keys.len != 0:
  55. while true:
  56. let litId = t.keys[h]
  57. if not isFilled(litId): break
  58. if t.vals[idToIdx t.keys[h]] == v: return litId
  59. h = nextTry(h, maxHash(t))
  60. # not found, we need to insert it:
  61. if mustRehash(t.keys.len, t.vals.len):
  62. enlarge(t)
  63. # recompute where to insert:
  64. h = origH and maxHash(t)
  65. while true:
  66. let litId = t.keys[h]
  67. if not isFilled(litId): break
  68. h = nextTry(h, maxHash(t))
  69. else:
  70. setLen(t.keys, 16)
  71. h = origH and maxHash(t)
  72. result = LitId(t.vals.len + idStart)
  73. t.keys[h] = result
  74. t.vals.add v
  75. proc `[]`*[T](t: var BiTable[T]; litId: LitId): var T {.inline.} =
  76. let idx = idToIdx litId
  77. assert idx < t.vals.len
  78. result = t.vals[idx]
  79. proc `[]`*[T](t: BiTable[T]; litId: LitId): lent T {.inline.} =
  80. let idx = idToIdx litId
  81. assert idx < t.vals.len
  82. result = t.vals[idx]
  83. proc hash*[T](t: BiTable[T]): Hash =
  84. ## as the keys are hashes of the values, we simply use them instead
  85. var h: Hash = 0
  86. for i, n in pairs t.keys:
  87. h = h !& hash((i, n))
  88. result = !$h
  89. proc store*[T](f: var RodFile; t: BiTable[T]) =
  90. storeSeq(f, t.vals)
  91. storeSeq(f, t.keys)
  92. proc load*[T](f: var RodFile; t: var BiTable[T]) =
  93. loadSeq(f, t.vals)
  94. loadSeq(f, t.keys)
  95. when isMainModule:
  96. var t: BiTable[string]
  97. echo getOrIncl(t, "hello")
  98. echo getOrIncl(t, "hello")
  99. echo getOrIncl(t, "hello3")
  100. echo getOrIncl(t, "hello4")
  101. echo getOrIncl(t, "helloasfasdfdsa")
  102. echo getOrIncl(t, "hello")
  103. echo getKeyId(t, "hello")
  104. echo getKeyId(t, "none")
  105. for i in 0 ..< 100_000:
  106. discard t.getOrIncl($i & "___" & $i)
  107. for i in 0 ..< 100_000:
  108. assert t.getOrIncl($i & "___" & $i).idToIdx == i + 4
  109. echo "begin"
  110. echo t.vals.len
  111. echo t.vals[0]
  112. echo t.vals[1004]
  113. echo "middle"
  114. var tf: BiTable[float]
  115. discard tf.getOrIncl(0.4)
  116. discard tf.getOrIncl(16.4)
  117. discard tf.getOrIncl(32.4)
  118. echo getKeyId(tf, 32.4)
  119. var f2 = open("testblah.bin", fmWrite)
  120. echo store(f2, tf)
  121. f2.close
  122. var f1 = open("testblah.bin", fmRead)
  123. var t2: BiTable[float]
  124. echo f1.load(t2)
  125. echo t2.vals.len
  126. echo getKeyId(t2, 32.4)
  127. echo "end"
  128. f1.close