bitabs.nim 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. ## A BiTable is a table that can be seen as an optimized pair
  2. ## of `(Table[LitId, Val], Table[Val, LitId])`.
  3. import hashes, rodfiles
  4. when defined(nimPreviewSlimSystem):
  5. import std/assertions
  6. type
  7. LitId* = distinct uint32
  8. BiTable*[T] = object
  9. vals: seq[T] # indexed by LitId
  10. keys: seq[LitId] # indexed by hash(val)
  11. proc nextTry(h, maxHash: Hash): Hash {.inline.} =
  12. result = (h + 1) and maxHash
  13. template maxHash(t): untyped = high(t.keys)
  14. template isFilled(x: LitId): bool = x.uint32 > 0'u32
  15. proc `$`*(x: LitId): string {.borrow.}
  16. proc `<`*(x, y: LitId): bool {.borrow.}
  17. proc `<=`*(x, y: LitId): bool {.borrow.}
  18. proc `==`*(x, y: LitId): bool {.borrow.}
  19. proc hash*(x: LitId): Hash {.borrow.}
  20. proc len*[T](t: BiTable[T]): int = t.vals.len
  21. proc mustRehash(length, counter: int): bool {.inline.} =
  22. assert(length > counter)
  23. result = (length * 2 < counter * 3) or (length - counter < 4)
  24. const
  25. idStart = 256 ##
  26. ## Ids do not start with 0 but with this value. The IR needs it.
  27. ## TODO: explain why
  28. template idToIdx(x: LitId): int = x.int - idStart
  29. proc hasLitId*[T](t: BiTable[T]; x: LitId): bool =
  30. let idx = idToIdx(x)
  31. result = idx >= 0 and idx < t.vals.len
  32. proc enlarge[T](t: var BiTable[T]) =
  33. var n: seq[LitId]
  34. newSeq(n, len(t.keys) * 2)
  35. swap(t.keys, n)
  36. for i in 0..high(n):
  37. let eh = n[i]
  38. if isFilled(eh):
  39. var j = hash(t.vals[idToIdx eh]) and maxHash(t)
  40. while isFilled(t.keys[j]):
  41. j = nextTry(j, maxHash(t))
  42. t.keys[j] = move n[i]
  43. proc getKeyId*[T](t: BiTable[T]; v: T): LitId =
  44. let origH = hash(v)
  45. var h = origH and maxHash(t)
  46. if t.keys.len != 0:
  47. while true:
  48. let litId = t.keys[h]
  49. if not isFilled(litId): break
  50. if t.vals[idToIdx t.keys[h]] == v: return litId
  51. h = nextTry(h, maxHash(t))
  52. return LitId(0)
  53. proc getOrIncl*[T](t: var BiTable[T]; v: T): LitId =
  54. let origH = hash(v)
  55. var h = origH and maxHash(t)
  56. if t.keys.len != 0:
  57. while true:
  58. let litId = t.keys[h]
  59. if not isFilled(litId): break
  60. if t.vals[idToIdx t.keys[h]] == v: return litId
  61. h = nextTry(h, maxHash(t))
  62. # not found, we need to insert it:
  63. if mustRehash(t.keys.len, t.vals.len):
  64. enlarge(t)
  65. # recompute where to insert:
  66. h = origH and maxHash(t)
  67. while true:
  68. let litId = t.keys[h]
  69. if not isFilled(litId): break
  70. h = nextTry(h, maxHash(t))
  71. else:
  72. setLen(t.keys, 16)
  73. h = origH and maxHash(t)
  74. result = LitId(t.vals.len + idStart)
  75. t.keys[h] = result
  76. t.vals.add v
  77. proc `[]`*[T](t: var BiTable[T]; litId: LitId): var T {.inline.} =
  78. let idx = idToIdx litId
  79. assert idx < t.vals.len
  80. result = t.vals[idx]
  81. proc `[]`*[T](t: BiTable[T]; litId: LitId): lent T {.inline.} =
  82. let idx = idToIdx litId
  83. assert idx < t.vals.len
  84. result = t.vals[idx]
  85. proc hash*[T](t: BiTable[T]): Hash =
  86. ## as the keys are hashes of the values, we simply use them instead
  87. var h: Hash = 0
  88. for i, n in pairs t.keys:
  89. h = h !& hash((i, n))
  90. result = !$h
  91. proc store*[T](f: var RodFile; t: BiTable[T]) =
  92. storeSeq(f, t.vals)
  93. storeSeq(f, t.keys)
  94. proc load*[T](f: var RodFile; t: var BiTable[T]) =
  95. loadSeq(f, t.vals)
  96. loadSeq(f, t.keys)
  97. when isMainModule:
  98. var t: BiTable[string]
  99. echo getOrIncl(t, "hello")
  100. echo getOrIncl(t, "hello")
  101. echo getOrIncl(t, "hello3")
  102. echo getOrIncl(t, "hello4")
  103. echo getOrIncl(t, "helloasfasdfdsa")
  104. echo getOrIncl(t, "hello")
  105. echo getKeyId(t, "hello")
  106. echo getKeyId(t, "none")
  107. for i in 0 ..< 100_000:
  108. discard t.getOrIncl($i & "___" & $i)
  109. for i in 0 ..< 100_000:
  110. assert t.getOrIncl($i & "___" & $i).idToIdx == i + 4
  111. echo "begin"
  112. echo t.vals.len
  113. echo t.vals[0]
  114. echo t.vals[1004]
  115. echo "middle"
  116. var tf: BiTable[float]
  117. discard tf.getOrIncl(0.4)
  118. discard tf.getOrIncl(16.4)
  119. discard tf.getOrIncl(32.4)
  120. echo getKeyId(tf, 32.4)
  121. var f2 = open("testblah.bin", fmWrite)
  122. echo store(f2, tf)
  123. f2.close
  124. var f1 = open("testblah.bin", fmRead)
  125. var t2: BiTable[float]
  126. echo f1.load(t2)
  127. echo t2.vals.len
  128. echo getKeyId(t2, 32.4)
  129. echo "end"
  130. f1.close