packedsets.nim 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2012 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## The `packedsets` module implements an efficient `Ordinal` set implemented as a
  10. ## `sparse bit set`:idx:.
  11. ##
  12. ## Supports any Ordinal type.
  13. ##
  14. ## .. note:: Currently the assignment operator `=` for `PackedSet[A]`
  15. ## performs some rather meaningless shallow copy. Since Nim currently does
  16. ## not allow the assignment operator to be overloaded, use the `assign proc
  17. ## <#assign,PackedSet[A],PackedSet[A]>`_ to get a deep copy.
  18. ##
  19. ## See also
  20. ## ========
  21. ## * `sets module <sets.html>`_ for more general hash sets
  22. import std/private/since
  23. import hashes
  24. when defined(nimPreviewSlimSystem):
  25. import std/assertions
  26. type
  27. BitScalar = uint
  28. const
  29. InitIntSetSize = 8 # must be a power of two!
  30. TrunkShift = 9
  31. BitsPerTrunk = 1 shl TrunkShift # needs to be a power of 2 and
  32. # divisible by 64
  33. TrunkMask = BitsPerTrunk - 1
  34. IntsPerTrunk = BitsPerTrunk div (sizeof(BitScalar) * 8)
  35. IntShift = 5 + ord(sizeof(BitScalar) == 8) # 5 or 6, depending on int width
  36. IntMask = 1 shl IntShift - 1
  37. type
  38. Trunk {.acyclic.} = ref object
  39. next: Trunk # all nodes are connected with this pointer
  40. key: int # start address at bit 0
  41. bits: array[0..IntsPerTrunk - 1, BitScalar] # a bit vector
  42. TrunkSeq = seq[Trunk]
  43. PackedSet*[A: Ordinal] = object
  44. ## An efficient set of `Ordinal` types implemented as a sparse bit set.
  45. elems: int # only valid for small numbers
  46. counter, max: int
  47. head: Trunk
  48. data: TrunkSeq
  49. a: array[0..33, int] # profiling shows that 34 elements are enough
  50. proc mustRehash[T](t: T): bool {.inline.} =
  51. let length = t.max + 1
  52. assert length > t.counter
  53. result = (length * 2 < t.counter * 3) or (length - t.counter < 4)
  54. proc nextTry(h, maxHash: Hash, perturb: var Hash): Hash {.inline.} =
  55. const PERTURB_SHIFT = 5
  56. var perturb2 = cast[uint](perturb) shr PERTURB_SHIFT
  57. perturb = cast[Hash](perturb2)
  58. result = ((5 * h) + 1 + perturb) and maxHash
  59. proc packedSetGet[A](t: PackedSet[A], key: int): Trunk =
  60. var h = key and t.max
  61. var perturb = key
  62. while t.data[h] != nil:
  63. if t.data[h].key == key:
  64. return t.data[h]
  65. h = nextTry(h, t.max, perturb)
  66. result = nil
  67. proc intSetRawInsert[A](t: PackedSet[A], data: var TrunkSeq, desc: Trunk) =
  68. var h = desc.key and t.max
  69. var perturb = desc.key
  70. while data[h] != nil:
  71. assert data[h] != desc
  72. h = nextTry(h, t.max, perturb)
  73. assert data[h] == nil
  74. data[h] = desc
  75. proc intSetEnlarge[A](t: var PackedSet[A]) =
  76. var n: TrunkSeq
  77. var oldMax = t.max
  78. t.max = ((t.max + 1) * 2) - 1
  79. newSeq(n, t.max + 1)
  80. for i in countup(0, oldMax):
  81. if t.data[i] != nil: intSetRawInsert(t, n, t.data[i])
  82. swap(t.data, n)
  83. proc intSetPut[A](t: var PackedSet[A], key: int): Trunk =
  84. var h = key and t.max
  85. var perturb = key
  86. while t.data[h] != nil:
  87. if t.data[h].key == key:
  88. return t.data[h]
  89. h = nextTry(h, t.max, perturb)
  90. if mustRehash(t): intSetEnlarge(t)
  91. inc(t.counter)
  92. h = key and t.max
  93. perturb = key
  94. while t.data[h] != nil: h = nextTry(h, t.max, perturb)
  95. assert t.data[h] == nil
  96. new(result)
  97. result.next = t.head
  98. result.key = key
  99. t.head = result
  100. t.data[h] = result
  101. proc bitincl[A](s: var PackedSet[A], key: int) {.inline.} =
  102. var ret: Trunk
  103. var t = intSetPut(s, key shr TrunkShift)
  104. var u = key and TrunkMask
  105. t.bits[u shr IntShift] = t.bits[u shr IntShift] or
  106. (BitScalar(1) shl (u and IntMask))
  107. proc exclImpl[A](s: var PackedSet[A], key: int) =
  108. if s.elems <= s.a.len:
  109. for i in 0..<s.elems:
  110. if s.a[i] == key:
  111. s.a[i] = s.a[s.elems - 1]
  112. dec(s.elems)
  113. return
  114. else:
  115. var t = packedSetGet(s, key shr TrunkShift)
  116. if t != nil:
  117. var u = key and TrunkMask
  118. t.bits[u shr IntShift] = t.bits[u shr IntShift] and
  119. not(BitScalar(1) shl (u and IntMask))
  120. template dollarImpl(): untyped =
  121. result = "{"
  122. for key in items(s):
  123. if result.len > 1: result.add(", ")
  124. result.add $key
  125. result.add("}")
  126. iterator items*[A](s: PackedSet[A]): A {.inline.} =
  127. ## Iterates over any included element of `s`.
  128. if s.elems <= s.a.len:
  129. for i in 0..<s.elems:
  130. yield A(s.a[i])
  131. else:
  132. var r = s.head
  133. while r != nil:
  134. var i = 0
  135. while i <= high(r.bits):
  136. var w: uint = r.bits[i]
  137. # taking a copy of r.bits[i] here is correct, because
  138. # modifying operations are not allowed during traversation
  139. var j = 0
  140. while w != 0: # test all remaining bits for zero
  141. if (w and 1) != 0: # the bit is set!
  142. yield A((r.key shl TrunkShift) or (i shl IntShift +% j))
  143. inc(j)
  144. w = w shr 1
  145. inc(i)
  146. r = r.next
  147. proc initPackedSet*[A]: PackedSet[A] =
  148. ## Returns an empty `PackedSet[A]`.
  149. ## `A` must be `Ordinal`.
  150. ##
  151. ## **See also:**
  152. ## * `toPackedSet proc <#toPackedSet,openArray[A]>`_
  153. runnableExamples:
  154. let a = initPackedSet[int]()
  155. assert len(a) == 0
  156. type Id = distinct int
  157. var ids = initPackedSet[Id]()
  158. ids.incl(3.Id)
  159. result = PackedSet[A](
  160. elems: 0,
  161. counter: 0,
  162. max: 0,
  163. head: nil,
  164. data: @[])
  165. # a: array[0..33, int] # profiling shows that 34 elements are enough
  166. proc contains*[A](s: PackedSet[A], key: A): bool =
  167. ## Returns true if `key` is in `s`.
  168. ##
  169. ## This allows the usage of the `in` operator.
  170. runnableExamples:
  171. type ABCD = enum A, B, C, D
  172. let a = [1, 3, 5].toPackedSet
  173. assert a.contains(3)
  174. assert 3 in a
  175. assert not a.contains(8)
  176. assert 8 notin a
  177. let letters = [A, C].toPackedSet
  178. assert A in letters
  179. assert C in letters
  180. assert B notin letters
  181. if s.elems <= s.a.len:
  182. for i in 0..<s.elems:
  183. if s.a[i] == ord(key): return true
  184. else:
  185. var t = packedSetGet(s, ord(key) shr TrunkShift)
  186. if t != nil:
  187. var u = ord(key) and TrunkMask
  188. result = (t.bits[u shr IntShift] and
  189. (BitScalar(1) shl (u and IntMask))) != 0
  190. else:
  191. result = false
  192. proc incl*[A](s: var PackedSet[A], key: A) =
  193. ## Includes an element `key` in `s`.
  194. ##
  195. ## This doesn't do anything if `key` is already in `s`.
  196. ##
  197. ## **See also:**
  198. ## * `excl proc <#excl,PackedSet[A],A>`_ for excluding an element
  199. ## * `incl proc <#incl,PackedSet[A],PackedSet[A]>`_ for including a set
  200. ## * `containsOrIncl proc <#containsOrIncl,PackedSet[A],A>`_
  201. runnableExamples:
  202. var a = initPackedSet[int]()
  203. a.incl(3)
  204. a.incl(3)
  205. assert len(a) == 1
  206. if s.elems <= s.a.len:
  207. for i in 0..<s.elems:
  208. if s.a[i] == ord(key): return
  209. if s.elems < s.a.len:
  210. s.a[s.elems] = ord(key)
  211. inc(s.elems)
  212. return
  213. newSeq(s.data, InitIntSetSize)
  214. s.max = InitIntSetSize - 1
  215. for i in 0..<s.elems:
  216. bitincl(s, s.a[i])
  217. s.elems = s.a.len + 1
  218. # fall through:
  219. bitincl(s, ord(key))
  220. proc incl*[A](s: var PackedSet[A], other: PackedSet[A]) =
  221. ## Includes all elements from `other` into `s`.
  222. ##
  223. ## This is the in-place version of `s + other <#+,PackedSet[A],PackedSet[A]>`_.
  224. ##
  225. ## **See also:**
  226. ## * `excl proc <#excl,PackedSet[A],PackedSet[A]>`_ for excluding a set
  227. ## * `incl proc <#incl,PackedSet[A],A>`_ for including an element
  228. ## * `containsOrIncl proc <#containsOrIncl,PackedSet[A],A>`_
  229. runnableExamples:
  230. var a = [1].toPackedSet
  231. a.incl([5].toPackedSet)
  232. assert len(a) == 2
  233. assert 5 in a
  234. for item in other.items: incl(s, item)
  235. proc toPackedSet*[A](x: openArray[A]): PackedSet[A] {.since: (1, 3).} =
  236. ## Creates a new `PackedSet[A]` that contains the elements of `x`.
  237. ##
  238. ## Duplicates are removed.
  239. ##
  240. ## **See also:**
  241. ## * `initPackedSet proc <#initPackedSet>`_
  242. runnableExamples:
  243. let a = [5, 6, 7, 8, 8].toPackedSet
  244. assert len(a) == 4
  245. assert $a == "{5, 6, 7, 8}"
  246. result = initPackedSet[A]()
  247. for item in x:
  248. result.incl(item)
  249. proc containsOrIncl*[A](s: var PackedSet[A], key: A): bool =
  250. ## Includes `key` in the set `s` and tells if `key` was already in `s`.
  251. ##
  252. ## The difference with regards to the `incl proc <#incl,PackedSet[A],A>`_ is
  253. ## that this proc returns true if `s` already contained `key`. The
  254. ## proc will return false if `key` was added as a new value to `s` during
  255. ## this call.
  256. ##
  257. ## **See also:**
  258. ## * `incl proc <#incl,PackedSet[A],A>`_ for including an element
  259. ## * `missingOrExcl proc <#missingOrExcl,PackedSet[A],A>`_
  260. runnableExamples:
  261. var a = initPackedSet[int]()
  262. assert a.containsOrIncl(3) == false
  263. assert a.containsOrIncl(3) == true
  264. assert a.containsOrIncl(4) == false
  265. if s.elems <= s.a.len:
  266. for i in 0..<s.elems:
  267. if s.a[i] == ord(key):
  268. return true
  269. incl(s, key)
  270. result = false
  271. else:
  272. var t = packedSetGet(s, ord(key) shr TrunkShift)
  273. if t != nil:
  274. var u = ord(key) and TrunkMask
  275. result = (t.bits[u shr IntShift] and BitScalar(1) shl (u and IntMask)) != 0
  276. if not result:
  277. t.bits[u shr IntShift] = t.bits[u shr IntShift] or
  278. (BitScalar(1) shl (u and IntMask))
  279. else:
  280. incl(s, key)
  281. result = false
  282. proc excl*[A](s: var PackedSet[A], key: A) =
  283. ## Excludes `key` from the set `s`.
  284. ##
  285. ## This doesn't do anything if `key` is not found in `s`.
  286. ##
  287. ## **See also:**
  288. ## * `incl proc <#incl,PackedSet[A],A>`_ for including an element
  289. ## * `excl proc <#excl,PackedSet[A],PackedSet[A]>`_ for excluding a set
  290. ## * `missingOrExcl proc <#missingOrExcl,PackedSet[A],A>`_
  291. runnableExamples:
  292. var a = [3].toPackedSet
  293. a.excl(3)
  294. a.excl(3)
  295. a.excl(99)
  296. assert len(a) == 0
  297. exclImpl[A](s, ord(key))
  298. proc excl*[A](s: var PackedSet[A], other: PackedSet[A]) =
  299. ## Excludes all elements from `other` from `s`.
  300. ##
  301. ## This is the in-place version of `s - other <#-,PackedSet[A],PackedSet[A]>`_.
  302. ##
  303. ## **See also:**
  304. ## * `incl proc <#incl,PackedSet[A],PackedSet[A]>`_ for including a set
  305. ## * `excl proc <#excl,PackedSet[A],A>`_ for excluding an element
  306. ## * `missingOrExcl proc <#missingOrExcl,PackedSet[A],A>`_
  307. runnableExamples:
  308. var a = [1, 5].toPackedSet
  309. a.excl([5].toPackedSet)
  310. assert len(a) == 1
  311. assert 5 notin a
  312. for item in other.items:
  313. excl(s, item)
  314. proc len*[A](s: PackedSet[A]): int {.inline.} =
  315. ## Returns the number of elements in `s`.
  316. runnableExamples:
  317. let a = [1, 3, 5].toPackedSet
  318. assert len(a) == 3
  319. if s.elems < s.a.len:
  320. result = s.elems
  321. else:
  322. result = 0
  323. for _ in s.items:
  324. # pending bug #11167; when fixed, check each explicit `items` to see if it can be removed
  325. inc(result)
  326. proc missingOrExcl*[A](s: var PackedSet[A], key: A): bool =
  327. ## Excludes `key` from the set `s` and tells if `key` was already missing from `s`.
  328. ##
  329. ## The difference with regards to the `excl proc <#excl,PackedSet[A],A>`_ is
  330. ## that this proc returns true if `key` was missing from `s`.
  331. ## The proc will return false if `key` was in `s` and it was removed
  332. ## during this call.
  333. ##
  334. ## **See also:**
  335. ## * `excl proc <#excl,PackedSet[A],A>`_ for excluding an element
  336. ## * `excl proc <#excl,PackedSet[A],PackedSet[A]>`_ for excluding a set
  337. ## * `containsOrIncl proc <#containsOrIncl,PackedSet[A],A>`_
  338. runnableExamples:
  339. var a = [5].toPackedSet
  340. assert a.missingOrExcl(5) == false
  341. assert a.missingOrExcl(5) == true
  342. var count = s.len
  343. exclImpl(s, ord(key))
  344. result = count == s.len
  345. proc clear*[A](result: var PackedSet[A]) =
  346. ## Clears the `PackedSet[A]` back to an empty state.
  347. runnableExamples:
  348. var a = [5, 7].toPackedSet
  349. clear(a)
  350. assert len(a) == 0
  351. # setLen(result.data, InitIntSetSize)
  352. # for i in 0..InitIntSetSize - 1: result.data[i] = nil
  353. # result.max = InitIntSetSize - 1
  354. result.data = @[]
  355. result.max = 0
  356. result.counter = 0
  357. result.head = nil
  358. result.elems = 0
  359. proc isNil*[A](x: PackedSet[A]): bool {.inline.} =
  360. ## Returns true if `x` is empty, false otherwise.
  361. runnableExamples:
  362. var a = initPackedSet[int]()
  363. assert a.isNil
  364. a.incl(2)
  365. assert not a.isNil
  366. a.excl(2)
  367. assert a.isNil
  368. x.head.isNil and x.elems == 0
  369. proc assign*[A](dest: var PackedSet[A], src: PackedSet[A]) =
  370. ## Copies `src` to `dest`.
  371. ## `dest` does not need to be initialized by the `initPackedSet proc <#initPackedSet>`_.
  372. runnableExamples:
  373. var
  374. a = initPackedSet[int]()
  375. b = initPackedSet[int]()
  376. b.incl(5)
  377. b.incl(7)
  378. a.assign(b)
  379. assert len(a) == 2
  380. if src.elems <= src.a.len:
  381. dest.data = @[]
  382. dest.max = 0
  383. dest.counter = src.counter
  384. dest.head = nil
  385. dest.elems = src.elems
  386. dest.a = src.a
  387. else:
  388. dest.counter = src.counter
  389. dest.max = src.max
  390. dest.elems = src.elems
  391. newSeq(dest.data, src.data.len)
  392. var it = src.head
  393. while it != nil:
  394. var h = it.key and dest.max
  395. var perturb = it.key
  396. while dest.data[h] != nil: h = nextTry(h, dest.max, perturb)
  397. assert dest.data[h] == nil
  398. var n: Trunk
  399. new(n)
  400. n.next = dest.head
  401. n.key = it.key
  402. n.bits = it.bits
  403. dest.head = n
  404. dest.data[h] = n
  405. it = it.next
  406. proc union*[A](s1, s2: PackedSet[A]): PackedSet[A] =
  407. ## Returns the union of the sets `s1` and `s2`.
  408. ##
  409. ## The same as `s1 + s2 <#+,PackedSet[A],PackedSet[A]>`_.
  410. runnableExamples:
  411. let
  412. a = [1, 2, 3].toPackedSet
  413. b = [3, 4, 5].toPackedSet
  414. c = union(a, b)
  415. assert c.len == 5
  416. assert c == [1, 2, 3, 4, 5].toPackedSet
  417. result.assign(s1)
  418. incl(result, s2)
  419. proc intersection*[A](s1, s2: PackedSet[A]): PackedSet[A] =
  420. ## Returns the intersection of the sets `s1` and `s2`.
  421. ##
  422. ## The same as `s1 * s2 <#*,PackedSet[A],PackedSet[A]>`_.
  423. runnableExamples:
  424. let
  425. a = [1, 2, 3].toPackedSet
  426. b = [3, 4, 5].toPackedSet
  427. c = intersection(a, b)
  428. assert c.len == 1
  429. assert c == [3].toPackedSet
  430. result = initPackedSet[A]()
  431. for item in s1.items:
  432. if contains(s2, item):
  433. incl(result, item)
  434. proc difference*[A](s1, s2: PackedSet[A]): PackedSet[A] =
  435. ## Returns the difference of the sets `s1` and `s2`.
  436. ##
  437. ## The same as `s1 - s2 <#-,PackedSet[A],PackedSet[A]>`_.
  438. runnableExamples:
  439. let
  440. a = [1, 2, 3].toPackedSet
  441. b = [3, 4, 5].toPackedSet
  442. c = difference(a, b)
  443. assert c.len == 2
  444. assert c == [1, 2].toPackedSet
  445. result = initPackedSet[A]()
  446. for item in s1.items:
  447. if not contains(s2, item):
  448. incl(result, item)
  449. proc symmetricDifference*[A](s1, s2: PackedSet[A]): PackedSet[A] =
  450. ## Returns the symmetric difference of the sets `s1` and `s2`.
  451. runnableExamples:
  452. let
  453. a = [1, 2, 3].toPackedSet
  454. b = [3, 4, 5].toPackedSet
  455. c = symmetricDifference(a, b)
  456. assert c.len == 4
  457. assert c == [1, 2, 4, 5].toPackedSet
  458. result.assign(s1)
  459. for item in s2.items:
  460. if containsOrIncl(result, item):
  461. excl(result, item)
  462. proc `+`*[A](s1, s2: PackedSet[A]): PackedSet[A] {.inline.} =
  463. ## Alias for `union(s1, s2) <#union,PackedSet[A],PackedSet[A]>`_.
  464. result = union(s1, s2)
  465. proc `*`*[A](s1, s2: PackedSet[A]): PackedSet[A] {.inline.} =
  466. ## Alias for `intersection(s1, s2) <#intersection,PackedSet[A],PackedSet[A]>`_.
  467. result = intersection(s1, s2)
  468. proc `-`*[A](s1, s2: PackedSet[A]): PackedSet[A] {.inline.} =
  469. ## Alias for `difference(s1, s2) <#difference,PackedSet[A],PackedSet[A]>`_.
  470. result = difference(s1, s2)
  471. proc disjoint*[A](s1, s2: PackedSet[A]): bool =
  472. ## Returns true if the sets `s1` and `s2` have no items in common.
  473. runnableExamples:
  474. let
  475. a = [1, 2].toPackedSet
  476. b = [2, 3].toPackedSet
  477. c = [3, 4].toPackedSet
  478. assert disjoint(a, b) == false
  479. assert disjoint(a, c) == true
  480. for item in s1.items:
  481. if contains(s2, item):
  482. return false
  483. return true
  484. proc card*[A](s: PackedSet[A]): int {.inline.} =
  485. ## Alias for `len() <#len,PackedSet[A]>`_.
  486. ##
  487. ## Card stands for the [cardinality](http://en.wikipedia.org/wiki/Cardinality)
  488. ## of a set.
  489. result = s.len()
  490. proc `<=`*[A](s1, s2: PackedSet[A]): bool =
  491. ## Returns true if `s1` is a subset of `s2`.
  492. ##
  493. ## A subset `s1` has all of its elements in `s2`, but `s2` doesn't necessarily
  494. ## have more elements than `s1`. That is, `s1` can be equal to `s2`.
  495. runnableExamples:
  496. let
  497. a = [1].toPackedSet
  498. b = [1, 2].toPackedSet
  499. c = [1, 3].toPackedSet
  500. assert a <= b
  501. assert b <= b
  502. assert not (c <= b)
  503. for item in s1.items:
  504. if not s2.contains(item):
  505. return false
  506. return true
  507. proc `<`*[A](s1, s2: PackedSet[A]): bool =
  508. ## Returns true if `s1` is a proper subset of `s2`.
  509. ##
  510. ## A strict or proper subset `s1` has all of its elements in `s2`, but `s2` has
  511. ## more elements than `s1`.
  512. runnableExamples:
  513. let
  514. a = [1].toPackedSet
  515. b = [1, 2].toPackedSet
  516. c = [1, 3].toPackedSet
  517. assert a < b
  518. assert not (b < b)
  519. assert not (c < b)
  520. return s1 <= s2 and not (s2 <= s1)
  521. proc `==`*[A](s1, s2: PackedSet[A]): bool =
  522. ## Returns true if both `s1` and `s2` have the same elements and set size.
  523. runnableExamples:
  524. assert [1, 2].toPackedSet == [2, 1].toPackedSet
  525. assert [1, 2].toPackedSet == [2, 1, 2].toPackedSet
  526. return s1 <= s2 and s2 <= s1
  527. proc `$`*[A](s: PackedSet[A]): string =
  528. ## Converts `s` to a string.
  529. runnableExamples:
  530. let a = [1, 2, 3].toPackedSet
  531. assert $a == "{1, 2, 3}"
  532. dollarImpl()