bitops.nim 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2017 Nim Authors
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## This module implements a series of low level methods for bit manipulation.
  10. ##
  11. ## By default, compiler intrinsics are used where possible to improve performance
  12. ## on supported compilers: `GCC`, `LLVM_GCC`, `CLANG`, `VCC`, `ICC`.
  13. ##
  14. ## The module will fallback to pure nim procs in case the backend is not supported.
  15. ## You can also use the flag `noIntrinsicsBitOpts` to disable compiler intrinsics.
  16. ##
  17. ## This module is also compatible with other backends: `JavaScript`, `NimScript`
  18. ## as well as the `compiletime VM`.
  19. ##
  20. ## As a result of using optimized functions/intrinsics, some functions can return
  21. ## undefined results if the input is invalid. You can use the flag `noUndefinedBitOpts`
  22. ## to force predictable behaviour for all input, causing a small performance hit.
  23. ##
  24. ## At this time only `fastLog2`, `firstSetBit`, `countLeadingZeroBits` and `countTrailingZeroBits`
  25. ## may return undefined and/or platform dependent values if given invalid input.
  26. import macros
  27. import std/private/since
  28. from std/private/bitops_utils import forwardImpl, castToUnsigned
  29. func bitnot*[T: SomeInteger](x: T): T {.magic: "BitnotI".}
  30. ## Computes the `bitwise complement` of the integer `x`.
  31. func internalBitand[T: SomeInteger](x, y: T): T {.magic: "BitandI".}
  32. func internalBitor[T: SomeInteger](x, y: T): T {.magic: "BitorI".}
  33. func internalBitxor[T: SomeInteger](x, y: T): T {.magic: "BitxorI".}
  34. macro bitand*[T: SomeInteger](x, y: T; z: varargs[T]): T =
  35. ## Computes the `bitwise and` of all arguments collectively.
  36. let fn = bindSym("internalBitand")
  37. result = newCall(fn, x, y)
  38. for extra in z:
  39. result = newCall(fn, result, extra)
  40. macro bitor*[T: SomeInteger](x, y: T; z: varargs[T]): T =
  41. ## Computes the `bitwise or` of all arguments collectively.
  42. let fn = bindSym("internalBitor")
  43. result = newCall(fn, x, y)
  44. for extra in z:
  45. result = newCall(fn, result, extra)
  46. macro bitxor*[T: SomeInteger](x, y: T; z: varargs[T]): T =
  47. ## Computes the `bitwise xor` of all arguments collectively.
  48. let fn = bindSym("internalBitxor")
  49. result = newCall(fn, x, y)
  50. for extra in z:
  51. result = newCall(fn, result, extra)
  52. type BitsRange*[T] = range[0..sizeof(T)*8-1]
  53. ## A range with all bit positions for type `T`.
  54. func bitsliced*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1, 3).} =
  55. ## Returns an extracted (and shifted) slice of bits from `v`.
  56. runnableExamples:
  57. doAssert 0b10111.bitsliced(2 .. 4) == 0b101
  58. doAssert 0b11100.bitsliced(0 .. 2) == 0b100
  59. doAssert 0b11100.bitsliced(0 ..< 3) == 0b100
  60. let
  61. upmost = sizeof(T) * 8 - 1
  62. uv = v.castToUnsigned
  63. (uv shl (upmost - slice.b) shr (upmost - slice.b + slice.a)).T
  64. proc bitslice*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1, 3).} =
  65. ## Mutates `v` into an extracted (and shifted) slice of bits from `v`.
  66. runnableExamples:
  67. var x = 0b101110
  68. x.bitslice(2 .. 4)
  69. doAssert x == 0b011
  70. let
  71. upmost = sizeof(T) * 8 - 1
  72. uv = v.castToUnsigned
  73. v = (uv shl (upmost - slice.b) shr (upmost - slice.b + slice.a)).T
  74. func toMask*[T: SomeInteger](slice: Slice[int]): T {.inline, since: (1, 3).} =
  75. ## Creates a bitmask based on a slice of bits.
  76. runnableExamples:
  77. doAssert toMask[int32](1 .. 3) == 0b1110'i32
  78. doAssert toMask[int32](0 .. 3) == 0b1111'i32
  79. let
  80. upmost = sizeof(T) * 8 - 1
  81. bitmask = bitnot(0.T).castToUnsigned
  82. (bitmask shl (upmost - slice.b + slice.a) shr (upmost - slice.b)).T
  83. proc masked*[T: SomeInteger](v, mask :T): T {.inline, since: (1, 3).} =
  84. ## Returns `v`, with only the `1` bits from `mask` matching those of
  85. ## `v` set to 1.
  86. ##
  87. ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation.
  88. runnableExamples:
  89. let v = 0b0000_0011'u8
  90. doAssert v.masked(0b0000_1010'u8) == 0b0000_0010'u8
  91. bitand(v, mask)
  92. func masked*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1, 3).} =
  93. ## Returns `v`, with only the `1` bits in the range of `slice`
  94. ## matching those of `v` set to 1.
  95. ##
  96. ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation.
  97. runnableExamples:
  98. let v = 0b0000_1011'u8
  99. doAssert v.masked(1 .. 3) == 0b0000_1010'u8
  100. bitand(v, toMask[T](slice))
  101. proc mask*[T: SomeInteger](v: var T; mask: T) {.inline, since: (1, 3).} =
  102. ## Mutates `v`, with only the `1` bits from `mask` matching those of
  103. ## `v` set to 1.
  104. ##
  105. ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation.
  106. runnableExamples:
  107. var v = 0b0000_0011'u8
  108. v.mask(0b0000_1010'u8)
  109. doAssert v == 0b0000_0010'u8
  110. v = bitand(v, mask)
  111. proc mask*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1, 3).} =
  112. ## Mutates `v`, with only the `1` bits in the range of `slice`
  113. ## matching those of `v` set to 1.
  114. ##
  115. ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation.
  116. runnableExamples:
  117. var v = 0b0000_1011'u8
  118. v.mask(1 .. 3)
  119. doAssert v == 0b0000_1010'u8
  120. v = bitand(v, toMask[T](slice))
  121. func setMasked*[T: SomeInteger](v, mask :T): T {.inline, since: (1, 3).} =
  122. ## Returns `v`, with all the `1` bits from `mask` set to 1.
  123. ##
  124. ## Effectively maps to a `bitor <#bitor.m,T,T,varargs[T]>`_ operation.
  125. runnableExamples:
  126. let v = 0b0000_0011'u8
  127. doAssert v.setMasked(0b0000_1010'u8) == 0b0000_1011'u8
  128. bitor(v, mask)
  129. func setMasked*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1, 3).} =
  130. ## Returns `v`, with all the `1` bits in the range of `slice` set to 1.
  131. ##
  132. ## Effectively maps to a `bitor <#bitor.m,T,T,varargs[T]>`_ operation.
  133. runnableExamples:
  134. let v = 0b0000_0011'u8
  135. doAssert v.setMasked(2 .. 3) == 0b0000_1111'u8
  136. bitor(v, toMask[T](slice))
  137. proc setMask*[T: SomeInteger](v: var T; mask: T) {.inline.} =
  138. ## Mutates `v`, with all the `1` bits from `mask` set to 1.
  139. ##
  140. ## Effectively maps to a `bitor <#bitor.m,T,T,varargs[T]>`_ operation.
  141. runnableExamples:
  142. var v = 0b0000_0011'u8
  143. v.setMask(0b0000_1010'u8)
  144. doAssert v == 0b0000_1011'u8
  145. v = bitor(v, mask)
  146. proc setMask*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1, 3).} =
  147. ## Mutates `v`, with all the `1` bits in the range of `slice` set to 1.
  148. ##
  149. ## Effectively maps to a `bitor <#bitor.m,T,T,varargs[T]>`_ operation.
  150. runnableExamples:
  151. var v = 0b0000_0011'u8
  152. v.setMask(2 .. 3)
  153. doAssert v == 0b0000_1111'u8
  154. v = bitor(v, toMask[T](slice))
  155. func clearMasked*[T: SomeInteger](v, mask :T): T {.inline, since: (1, 3).} =
  156. ## Returns `v`, with all the `1` bits from `mask` set to 0.
  157. ##
  158. ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation
  159. ## with an *inverted mask*.
  160. runnableExamples:
  161. let v = 0b0000_0011'u8
  162. doAssert v.clearMasked(0b0000_1010'u8) == 0b0000_0001'u8
  163. bitand(v, bitnot(mask))
  164. func clearMasked*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1, 3).} =
  165. ## Returns `v`, with all the `1` bits in the range of `slice` set to 0.
  166. ##
  167. ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation
  168. ## with an *inverted mask*.
  169. runnableExamples:
  170. let v = 0b0000_0011'u8
  171. doAssert v.clearMasked(1 .. 3) == 0b0000_0001'u8
  172. bitand(v, bitnot(toMask[T](slice)))
  173. proc clearMask*[T: SomeInteger](v: var T; mask: T) {.inline.} =
  174. ## Mutates `v`, with all the `1` bits from `mask` set to 0.
  175. ##
  176. ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation
  177. ## with an *inverted mask*.
  178. runnableExamples:
  179. var v = 0b0000_0011'u8
  180. v.clearMask(0b0000_1010'u8)
  181. doAssert v == 0b0000_0001'u8
  182. v = bitand(v, bitnot(mask))
  183. proc clearMask*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1, 3).} =
  184. ## Mutates `v`, with all the `1` bits in the range of `slice` set to 0.
  185. ##
  186. ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation
  187. ## with an *inverted mask*.
  188. runnableExamples:
  189. var v = 0b0000_0011'u8
  190. v.clearMask(1 .. 3)
  191. doAssert v == 0b0000_0001'u8
  192. v = bitand(v, bitnot(toMask[T](slice)))
  193. func flipMasked*[T: SomeInteger](v, mask :T): T {.inline, since: (1, 3).} =
  194. ## Returns `v`, with all the `1` bits from `mask` flipped.
  195. ##
  196. ## Effectively maps to a `bitxor <#bitxor.m,T,T,varargs[T]>`_ operation.
  197. runnableExamples:
  198. let v = 0b0000_0011'u8
  199. doAssert v.flipMasked(0b0000_1010'u8) == 0b0000_1001'u8
  200. bitxor(v, mask)
  201. func flipMasked*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1, 3).} =
  202. ## Returns `v`, with all the `1` bits in the range of `slice` flipped.
  203. ##
  204. ## Effectively maps to a `bitxor <#bitxor.m,T,T,varargs[T]>`_ operation.
  205. runnableExamples:
  206. let v = 0b0000_0011'u8
  207. doAssert v.flipMasked(1 .. 3) == 0b0000_1101'u8
  208. bitxor(v, toMask[T](slice))
  209. proc flipMask*[T: SomeInteger](v: var T; mask: T) {.inline.} =
  210. ## Mutates `v`, with all the `1` bits from `mask` flipped.
  211. ##
  212. ## Effectively maps to a `bitxor <#bitxor.m,T,T,varargs[T]>`_ operation.
  213. runnableExamples:
  214. var v = 0b0000_0011'u8
  215. v.flipMask(0b0000_1010'u8)
  216. doAssert v == 0b0000_1001'u8
  217. v = bitxor(v, mask)
  218. proc flipMask*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1, 3).} =
  219. ## Mutates `v`, with all the `1` bits in the range of `slice` flipped.
  220. ##
  221. ## Effectively maps to a `bitxor <#bitxor.m,T,T,varargs[T]>`_ operation.
  222. runnableExamples:
  223. var v = 0b0000_0011'u8
  224. v.flipMask(1 .. 3)
  225. doAssert v == 0b0000_1101'u8
  226. v = bitxor(v, toMask[T](slice))
  227. proc setBit*[T: SomeInteger](v: var T; bit: BitsRange[T]) {.inline.} =
  228. ## Mutates `v`, with the bit at position `bit` set to 1.
  229. runnableExamples:
  230. var v = 0b0000_0011'u8
  231. v.setBit(5'u8)
  232. doAssert v == 0b0010_0011'u8
  233. v.setMask(1.T shl bit)
  234. proc clearBit*[T: SomeInteger](v: var T; bit: BitsRange[T]) {.inline.} =
  235. ## Mutates `v`, with the bit at position `bit` set to 0.
  236. runnableExamples:
  237. var v = 0b0000_0011'u8
  238. v.clearBit(1'u8)
  239. doAssert v == 0b0000_0001'u8
  240. v.clearMask(1.T shl bit)
  241. proc flipBit*[T: SomeInteger](v: var T; bit: BitsRange[T]) {.inline.} =
  242. ## Mutates `v`, with the bit at position `bit` flipped.
  243. runnableExamples:
  244. var v = 0b0000_0011'u8
  245. v.flipBit(1'u8)
  246. doAssert v == 0b0000_0001'u8
  247. v = 0b0000_0011'u8
  248. v.flipBit(2'u8)
  249. doAssert v == 0b0000_0111'u8
  250. v.flipMask(1.T shl bit)
  251. macro setBits*(v: typed; bits: varargs[typed]): untyped =
  252. ## Mutates `v`, with the bits at positions `bits` set to 1.
  253. runnableExamples:
  254. var v = 0b0000_0011'u8
  255. v.setBits(3, 5, 7)
  256. doAssert v == 0b1010_1011'u8
  257. bits.expectKind(nnkBracket)
  258. result = newStmtList()
  259. for bit in bits:
  260. result.add newCall("setBit", v, bit)
  261. macro clearBits*(v: typed; bits: varargs[typed]): untyped =
  262. ## Mutates `v`, with the bits at positions `bits` set to 0.
  263. runnableExamples:
  264. var v = 0b1111_1111'u8
  265. v.clearBits(1, 3, 5, 7)
  266. doAssert v == 0b0101_0101'u8
  267. bits.expectKind(nnkBracket)
  268. result = newStmtList()
  269. for bit in bits:
  270. result.add newCall("clearBit", v, bit)
  271. macro flipBits*(v: typed; bits: varargs[typed]): untyped =
  272. ## Mutates `v`, with the bits at positions `bits` set to 0.
  273. runnableExamples:
  274. var v = 0b0000_1111'u8
  275. v.flipBits(1, 3, 5, 7)
  276. doAssert v == 0b1010_0101'u8
  277. bits.expectKind(nnkBracket)
  278. result = newStmtList()
  279. for bit in bits:
  280. result.add newCall("flipBit", v, bit)
  281. proc testBit*[T: SomeInteger](v: T; bit: BitsRange[T]): bool {.inline.} =
  282. ## Returns true if the bit in `v` at positions `bit` is set to 1.
  283. runnableExamples:
  284. let v = 0b0000_1111'u8
  285. doAssert v.testBit(0)
  286. doAssert not v.testBit(7)
  287. let mask = 1.T shl bit
  288. return (v and mask) == mask
  289. # #### Pure Nim version ####
  290. func firstSetBitNim(x: uint32): int {.inline.} =
  291. ## Returns the 1-based index of the least significant set bit of x, or if x is zero, returns zero.
  292. # https://graphics.stanford.edu/%7Eseander/bithacks.html#ZerosOnRightMultLookup
  293. const lookup: array[32, uint8] = [0'u8, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15,
  294. 25, 17, 4, 8, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9]
  295. let v = x.uint32
  296. let k = not v + 1 # get two's complement # cast[uint32](-cast[int32](v))
  297. result = 1 + lookup[uint32((v and k) * 0x077CB531'u32) shr 27].int
  298. func firstSetBitNim(x: uint64): int {.inline.} =
  299. ## Returns the 1-based index of the least significant set bit of x, or if x is zero, returns zero.
  300. # https://graphics.stanford.edu/%7Eseander/bithacks.html#ZerosOnRightMultLookup
  301. let v = uint64(x)
  302. var k = uint32(v and 0xFFFFFFFF'u32)
  303. if k == 0:
  304. k = uint32(v shr 32'u32) and 0xFFFFFFFF'u32
  305. result = 32
  306. else:
  307. result = 0
  308. result += firstSetBitNim(k)
  309. func fastlog2Nim(x: uint32): int {.inline.} =
  310. ## Quickly find the log base 2 of a 32-bit or less integer.
  311. # https://graphics.stanford.edu/%7Eseander/bithacks.html#IntegerLogDeBruijn
  312. # https://stackoverflow.com/questions/11376288/fast-computing-of-log2-for-64-bit-integers
  313. const lookup: array[32, uint8] = [0'u8, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18,
  314. 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31]
  315. var v = x.uint32
  316. v = v or v shr 1 # first round down to one less than a power of 2
  317. v = v or v shr 2
  318. v = v or v shr 4
  319. v = v or v shr 8
  320. v = v or v shr 16
  321. result = lookup[uint32(v * 0x07C4ACDD'u32) shr 27].int
  322. func fastlog2Nim(x: uint64): int {.inline.} =
  323. ## Quickly find the log base 2 of a 64-bit integer.
  324. # https://graphics.stanford.edu/%7Eseander/bithacks.html#IntegerLogDeBruijn
  325. # https://stackoverflow.com/questions/11376288/fast-computing-of-log2-for-64-bit-integers
  326. const lookup: array[64, uint8] = [0'u8, 58, 1, 59, 47, 53, 2, 60, 39, 48, 27, 54,
  327. 33, 42, 3, 61, 51, 37, 40, 49, 18, 28, 20, 55, 30, 34, 11, 43, 14, 22, 4, 62,
  328. 57, 46, 52, 38, 26, 32, 41, 50, 36, 17, 19, 29, 10, 13, 21, 56, 45, 25, 31,
  329. 35, 16, 9, 12, 44, 24, 15, 8, 23, 7, 6, 5, 63]
  330. var v = x.uint64
  331. v = v or v shr 1 # first round down to one less than a power of 2
  332. v = v or v shr 2
  333. v = v or v shr 4
  334. v = v or v shr 8
  335. v = v or v shr 16
  336. v = v or v shr 32
  337. result = lookup[(v * 0x03F6EAF2CD271461'u64) shr 58].int
  338. import system/countbits_impl
  339. const useBuiltinsRotate = (defined(amd64) or defined(i386)) and
  340. (defined(gcc) or defined(clang) or defined(vcc) or
  341. (defined(icl) and not defined(cpp))) and useBuiltins
  342. template parityImpl[T](value: T): int =
  343. # formula id from: https://graphics.stanford.edu/%7Eseander/bithacks.html#ParityParallel
  344. var v = value
  345. when sizeof(T) == 8:
  346. v = v xor (v shr 32)
  347. when sizeof(T) >= 4:
  348. v = v xor (v shr 16)
  349. when sizeof(T) >= 2:
  350. v = v xor (v shr 8)
  351. v = v xor (v shr 4)
  352. v = v and 0xf
  353. ((0x6996'u shr v) and 1).int
  354. when useGCC_builtins:
  355. # Returns the bit parity in value
  356. proc builtin_parity(x: cuint): cint {.importc: "__builtin_parity", cdecl.}
  357. proc builtin_parityll(x: culonglong): cint {.importc: "__builtin_parityll", cdecl.}
  358. # Returns one plus the index of the least significant 1-bit of x, or if x is zero, returns zero.
  359. proc builtin_ffs(x: cint): cint {.importc: "__builtin_ffs", cdecl.}
  360. proc builtin_ffsll(x: clonglong): cint {.importc: "__builtin_ffsll", cdecl.}
  361. # Returns the number of leading 0-bits in x, starting at the most significant bit position. If x is 0, the result is undefined.
  362. proc builtin_clz(x: cuint): cint {.importc: "__builtin_clz", cdecl.}
  363. proc builtin_clzll(x: culonglong): cint {.importc: "__builtin_clzll", cdecl.}
  364. # Returns the number of trailing 0-bits in x, starting at the least significant bit position. If x is 0, the result is undefined.
  365. proc builtin_ctz(x: cuint): cint {.importc: "__builtin_ctz", cdecl.}
  366. proc builtin_ctzll(x: culonglong): cint {.importc: "__builtin_ctzll", cdecl.}
  367. elif useVCC_builtins:
  368. # Search the mask data from most significant bit (MSB) to least significant bit (LSB) for a set bit (1).
  369. func bitScanReverse(index: ptr culong, mask: culong): uint8 {.
  370. importc: "_BitScanReverse", header: "<intrin.h>".}
  371. func bitScanReverse64(index: ptr culong, mask: uint64): uint8 {.
  372. importc: "_BitScanReverse64", header: "<intrin.h>".}
  373. # Search the mask data from least significant bit (LSB) to the most significant bit (MSB) for a set bit (1).
  374. func bitScanForward(index: ptr culong, mask: culong): uint8 {.
  375. importc: "_BitScanForward", header: "<intrin.h>".}
  376. func bitScanForward64(index: ptr culong, mask: uint64): uint8 {.
  377. importc: "_BitScanForward64", header: "<intrin.h>".}
  378. template vcc_scan_impl(fnc: untyped; v: untyped): int =
  379. var index: culong
  380. discard fnc(index.addr, v)
  381. index.int
  382. elif useICC_builtins:
  383. # Returns the number of trailing 0-bits in x, starting at the least significant bit position. If x is 0, the result is undefined.
  384. func bitScanForward(p: ptr uint32, b: uint32): uint8 {.
  385. importc: "_BitScanForward", header: "<immintrin.h>".}
  386. func bitScanForward64(p: ptr uint32, b: uint64): uint8 {.
  387. importc: "_BitScanForward64", header: "<immintrin.h>".}
  388. # Returns the number of leading 0-bits in x, starting at the most significant bit position. If x is 0, the result is undefined.
  389. func bitScanReverse(p: ptr uint32, b: uint32): uint8 {.
  390. importc: "_BitScanReverse", header: "<immintrin.h>".}
  391. func bitScanReverse64(p: ptr uint32, b: uint64): uint8 {.
  392. importc: "_BitScanReverse64", header: "<immintrin.h>".}
  393. template icc_scan_impl(fnc: untyped; v: untyped): int =
  394. var index: uint32
  395. discard fnc(index.addr, v)
  396. index.int
  397. func countSetBits*(x: SomeInteger): int {.inline.} =
  398. ## Counts the set bits in an integer (also called `Hamming weight`:idx:).
  399. runnableExamples:
  400. doAssert countSetBits(0b0000_0011'u8) == 2
  401. doAssert countSetBits(0b1010_1010'u8) == 4
  402. result = countSetBitsImpl(x)
  403. func popcount*(x: SomeInteger): int {.inline.} =
  404. ## Alias for `countSetBits <#countSetBits,SomeInteger>`_ (Hamming weight).
  405. result = countSetBits(x)
  406. func parityBits*(x: SomeInteger): int {.inline.} =
  407. ## Calculate the bit parity in an integer. If the number of 1-bits
  408. ## is odd, the parity is 1, otherwise 0.
  409. runnableExamples:
  410. doAssert parityBits(0b0000_0000'u8) == 0
  411. doAssert parityBits(0b0101_0001'u8) == 1
  412. doAssert parityBits(0b0110_1001'u8) == 0
  413. doAssert parityBits(0b0111_1111'u8) == 1
  414. # Can be used a base if creating ASM version.
  415. # https://stackoverflow.com/questions/21617970/how-to-check-if-value-has-even-parity-of-bits-or-odd
  416. let x = x.castToUnsigned
  417. when nimvm:
  418. result = forwardImpl(parityImpl, x)
  419. else:
  420. when useGCC_builtins:
  421. when sizeof(x) <= 4: result = builtin_parity(x.uint32).int
  422. else: result = builtin_parityll(x.uint64).int
  423. else:
  424. when sizeof(x) <= 4: result = parityImpl(x.uint32)
  425. else: result = parityImpl(x.uint64)
  426. func firstSetBit*(x: SomeInteger): int {.inline.} =
  427. ## Returns the 1-based index of the least significant set bit of `x`.
  428. ## If `x` is zero, when `noUndefinedBitOpts` is set, the result is 0,
  429. ## otherwise the result is undefined.
  430. runnableExamples:
  431. doAssert firstSetBit(0b0000_0001'u8) == 1
  432. doAssert firstSetBit(0b0000_0010'u8) == 2
  433. doAssert firstSetBit(0b0000_0100'u8) == 3
  434. doAssert firstSetBit(0b0000_1000'u8) == 4
  435. doAssert firstSetBit(0b0000_1111'u8) == 1
  436. # GCC builtin 'builtin_ffs' already handle zero input.
  437. let x = x.castToUnsigned
  438. when nimvm:
  439. when noUndefined:
  440. if x == 0:
  441. return 0
  442. result = forwardImpl(firstSetBitNim, x)
  443. else:
  444. when noUndefined and not useGCC_builtins:
  445. if x == 0:
  446. return 0
  447. when useGCC_builtins:
  448. when sizeof(x) <= 4: result = builtin_ffs(cast[cint](x.cuint)).int
  449. else: result = builtin_ffsll(cast[clonglong](x.culonglong)).int
  450. elif useVCC_builtins:
  451. when sizeof(x) <= 4:
  452. result = 1 + vcc_scan_impl(bitScanForward, x.culong)
  453. elif arch64:
  454. result = 1 + vcc_scan_impl(bitScanForward64, x.uint64)
  455. else:
  456. result = firstSetBitNim(x.uint64)
  457. elif useICC_builtins:
  458. when sizeof(x) <= 4:
  459. result = 1 + icc_scan_impl(bitScanForward, x.uint32)
  460. elif arch64:
  461. result = 1 + icc_scan_impl(bitScanForward64, x.uint64)
  462. else:
  463. result = firstSetBitNim(x.uint64)
  464. else:
  465. when sizeof(x) <= 4: result = firstSetBitNim(x.uint32)
  466. else: result = firstSetBitNim(x.uint64)
  467. func fastLog2*(x: SomeInteger): int {.inline.} =
  468. ## Quickly find the log base 2 of an integer.
  469. ## If `x` is zero, when `noUndefinedBitOpts` is set, the result is -1,
  470. ## otherwise the result is undefined.
  471. runnableExamples:
  472. doAssert fastLog2(0b0000_0001'u8) == 0
  473. doAssert fastLog2(0b0000_0010'u8) == 1
  474. doAssert fastLog2(0b0000_0100'u8) == 2
  475. doAssert fastLog2(0b0000_1000'u8) == 3
  476. doAssert fastLog2(0b0000_1111'u8) == 3
  477. let x = x.castToUnsigned
  478. when noUndefined:
  479. if x == 0:
  480. return -1
  481. when nimvm:
  482. result = forwardImpl(fastlog2Nim, x)
  483. else:
  484. when useGCC_builtins:
  485. when sizeof(x) <= 4: result = 31 - builtin_clz(x.uint32).int
  486. else: result = 63 - builtin_clzll(x.uint64).int
  487. elif useVCC_builtins:
  488. when sizeof(x) <= 4:
  489. result = vcc_scan_impl(bitScanReverse, x.culong)
  490. elif arch64:
  491. result = vcc_scan_impl(bitScanReverse64, x.uint64)
  492. else:
  493. result = fastlog2Nim(x.uint64)
  494. elif useICC_builtins:
  495. when sizeof(x) <= 4:
  496. result = icc_scan_impl(bitScanReverse, x.uint32)
  497. elif arch64:
  498. result = icc_scan_impl(bitScanReverse64, x.uint64)
  499. else:
  500. result = fastlog2Nim(x.uint64)
  501. else:
  502. when sizeof(x) <= 4: result = fastlog2Nim(x.uint32)
  503. else: result = fastlog2Nim(x.uint64)
  504. func countLeadingZeroBits*(x: SomeInteger): int {.inline.} =
  505. ## Returns the number of leading zero bits in an integer.
  506. ## If `x` is zero, when `noUndefinedBitOpts` is set, the result is 0,
  507. ## otherwise the result is undefined.
  508. ##
  509. ## **See also:**
  510. ## * `countTrailingZeroBits proc <#countTrailingZeroBits,SomeInteger>`_
  511. runnableExamples:
  512. doAssert countLeadingZeroBits(0b0000_0001'u8) == 7
  513. doAssert countLeadingZeroBits(0b0000_0010'u8) == 6
  514. doAssert countLeadingZeroBits(0b0000_0100'u8) == 5
  515. doAssert countLeadingZeroBits(0b0000_1000'u8) == 4
  516. doAssert countLeadingZeroBits(0b0000_1111'u8) == 4
  517. let x = x.castToUnsigned
  518. when noUndefined:
  519. if x == 0:
  520. return 0
  521. when nimvm:
  522. result = sizeof(x)*8 - 1 - forwardImpl(fastlog2Nim, x)
  523. else:
  524. when useGCC_builtins:
  525. when sizeof(x) <= 4: result = builtin_clz(x.uint32).int - (32 - sizeof(x)*8)
  526. else: result = builtin_clzll(x.uint64).int
  527. else:
  528. when sizeof(x) <= 4: result = sizeof(x)*8 - 1 - fastlog2Nim(x.uint32)
  529. else: result = sizeof(x)*8 - 1 - fastlog2Nim(x.uint64)
  530. func countTrailingZeroBits*(x: SomeInteger): int {.inline.} =
  531. ## Returns the number of trailing zeros in an integer.
  532. ## If `x` is zero, when `noUndefinedBitOpts` is set, the result is 0,
  533. ## otherwise the result is undefined.
  534. ##
  535. ## **See also:**
  536. ## * `countLeadingZeroBits proc <#countLeadingZeroBits,SomeInteger>`_
  537. runnableExamples:
  538. doAssert countTrailingZeroBits(0b0000_0001'u8) == 0
  539. doAssert countTrailingZeroBits(0b0000_0010'u8) == 1
  540. doAssert countTrailingZeroBits(0b0000_0100'u8) == 2
  541. doAssert countTrailingZeroBits(0b0000_1000'u8) == 3
  542. doAssert countTrailingZeroBits(0b0000_1111'u8) == 0
  543. let x = x.castToUnsigned
  544. when noUndefined:
  545. if x == 0:
  546. return 0
  547. when nimvm:
  548. result = firstSetBit(x) - 1
  549. else:
  550. when useGCC_builtins:
  551. when sizeof(x) <= 4: result = builtin_ctz(x.uint32).int
  552. else: result = builtin_ctzll(x.uint64).int
  553. else:
  554. result = firstSetBit(x) - 1
  555. when useBuiltinsRotate:
  556. when defined(gcc):
  557. # GCC was tested until version 4.8.1 and intrinsics were present. Not tested
  558. # in previous versions.
  559. func builtin_rotl8(value: uint8, shift: cint): uint8
  560. {.importc: "__rolb", header: "<x86intrin.h>".}
  561. func builtin_rotl16(value: cushort, shift: cint): cushort
  562. {.importc: "__rolw", header: "<x86intrin.h>".}
  563. func builtin_rotl32(value: cuint, shift: cint): cuint
  564. {.importc: "__rold", header: "<x86intrin.h>".}
  565. when defined(amd64):
  566. func builtin_rotl64(value: culonglong, shift: cint): culonglong
  567. {.importc: "__rolq", header: "<x86intrin.h>".}
  568. func builtin_rotr8(value: uint8, shift: cint): uint8
  569. {.importc: "__rorb", header: "<x86intrin.h>".}
  570. func builtin_rotr16(value: cushort, shift: cint): cushort
  571. {.importc: "__rorw", header: "<x86intrin.h>".}
  572. func builtin_rotr32(value: cuint, shift: cint): cuint
  573. {.importc: "__rord", header: "<x86intrin.h>".}
  574. when defined(amd64):
  575. func builtin_rotr64(value: culonglong, shift: cint): culonglong
  576. {.importc: "__rorq", header: "<x86intrin.h>".}
  577. elif defined(clang):
  578. # In CLANG, builtins have been present since version 8.0.0 and intrinsics
  579. # since version 9.0.0. This implementation chose the builtins, as they have
  580. # been around for longer.
  581. # https://releases.llvm.org/8.0.0/tools/clang/docs/ReleaseNotes.html#non-comprehensive-list-of-changes-in-this-release
  582. # https://releases.llvm.org/8.0.0/tools/clang/docs/LanguageExtensions.html#builtin-rotateleft
  583. # source for correct declarations: https://github.com/llvm/llvm-project/blob/main/clang/include/clang/Basic/Builtins.def
  584. func builtin_rotl8(value: uint8, shift: uint8): uint8
  585. {.importc: "__builtin_rotateleft8", nodecl.}
  586. func builtin_rotl16(value: cushort, shift: cushort): cushort
  587. {.importc: "__builtin_rotateleft16", nodecl.}
  588. func builtin_rotl32(value: cuint, shift: cuint): cuint
  589. {.importc: "__builtin_rotateleft32", nodecl.}
  590. when defined(amd64):
  591. func builtin_rotl64(value: culonglong, shift: culonglong): culonglong
  592. {.importc: "__builtin_rotateleft64", nodecl.}
  593. func builtin_rotr8(value: uint8, shift: uint8): uint8
  594. {.importc: "__builtin_rotateright8", nodecl.}
  595. func builtin_rotr16(value: cushort, shift: cushort): cushort
  596. {.importc: "__builtin_rotateright16", nodecl.}
  597. func builtin_rotr32(value: cuint, shift: cuint): cuint
  598. {.importc: "__builtin_rotateright32", nodecl.}
  599. when defined(amd64):
  600. # shift is unsigned, refs https://github.com/llvm-mirror/clang/commit/892de415b7fde609dafc4e6c1643b7eaa0150a4d
  601. func builtin_rotr64(value: culonglong, shift: culonglong): culonglong
  602. {.importc: "__builtin_rotateright64", nodecl.}
  603. elif defined(vcc):
  604. # Tested on Microsoft (R) C/C++ Optimizing Compiler 19.28.29335 x64 and x86.
  605. # Not tested in previous versions.
  606. # https://docs.microsoft.com/en-us/cpp/intrinsics/rotl8-rotl16?view=msvc-160
  607. # https://docs.microsoft.com/en-us/cpp/intrinsics/rotr8-rotr16?view=msvc-160
  608. # https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/rotl-rotl64-rotr-rotr64?view=msvc-160
  609. func builtin_rotl8(value: uint8, shift: uint8): uint8
  610. {.importc: "_rotl8", header: "<intrin.h>".}
  611. func builtin_rotl16(value: cushort, shift: uint8): cushort
  612. {.importc: "_rotl16", header: "<intrin.h>".}
  613. func builtin_rotl32(value: cuint, shift: cint): cuint
  614. {.importc: "_rotl", header: "<stdlib.h>".}
  615. when defined(amd64):
  616. func builtin_rotl64(value: culonglong, shift: cint): culonglong
  617. {.importc: "_rotl64", header: "<stdlib.h>".}
  618. func builtin_rotr8(value: uint8, shift: uint8): uint8
  619. {.importc: "_rotr8", header: "<intrin.h>".}
  620. func builtin_rotr16(value: cushort, shift: uint8): cushort
  621. {.importc: "_rotr16", header: "<intrin.h>".}
  622. func builtin_rotr32(value: cuint, shift: cint): cuint
  623. {.importc: "_rotr", header: "<stdlib.h>".}
  624. when defined(amd64):
  625. func builtin_rotr64(value: culonglong, shift: cint): culonglong
  626. {.importc: "_rotr64", header: "<stdlib.h>".}
  627. elif defined(icl):
  628. # Tested on Intel(R) C++ Intel(R) 64 Compiler Classic Version 2021.1.2 Build
  629. # 20201208_000000 x64 and x86. Not tested in previous versions.
  630. func builtin_rotl8(value: uint8, shift: cint): uint8
  631. {.importc: "__rolb", header: "<immintrin.h>".}
  632. func builtin_rotl16(value: cushort, shift: cint): cushort
  633. {.importc: "__rolw", header: "<immintrin.h>".}
  634. func builtin_rotl32(value: cuint, shift: cint): cuint
  635. {.importc: "__rold", header: "<immintrin.h>".}
  636. when defined(amd64):
  637. func builtin_rotl64(value: culonglong, shift: cint): culonglong
  638. {.importc: "__rolq", header: "<immintrin.h>".}
  639. func builtin_rotr8(value: uint8, shift: cint): uint8
  640. {.importc: "__rorb", header: "<immintrin.h>".}
  641. func builtin_rotr16(value: cushort, shift: cint): cushort
  642. {.importc: "__rorw", header: "<immintrin.h>".}
  643. func builtin_rotr32(value: cuint, shift: cint): cuint
  644. {.importc: "__rord", header: "<immintrin.h>".}
  645. when defined(amd64):
  646. func builtin_rotr64(value: culonglong, shift: cint): culonglong
  647. {.importc: "__rorq", header: "<immintrin.h>".}
  648. func rotl[T: SomeUnsignedInt](value: T, rot: int32): T {.inline.} =
  649. ## Left-rotate bits in a `value`.
  650. # https://stackoverflow.com/a/776523
  651. const mask = 8 * sizeof(value) - 1
  652. let rot = rot and mask
  653. (value shl rot) or (value shr ((-rot) and mask))
  654. func rotr[T: SomeUnsignedInt](value: T, rot: int32): T {.inline.} =
  655. ## Right-rotate bits in a `value`.
  656. const mask = 8 * sizeof(value) - 1
  657. let rot = rot and mask
  658. (value shr rot) or (value shl ((-rot) and mask))
  659. func shiftTypeTo(size: static int, shift: int): auto {.inline.} =
  660. ## Returns the `shift` for the rotation according to the compiler and the
  661. ## `size`.
  662. when (defined(vcc) and (size in [4, 8])) or defined(gcc) or defined(icl):
  663. cint(shift)
  664. elif (defined(vcc) and (size in [1, 2])) or (defined(clang) and size == 1):
  665. uint8(shift)
  666. elif defined(clang):
  667. when size == 2:
  668. cushort(shift)
  669. elif size == 4:
  670. cuint(shift)
  671. elif size == 8:
  672. culonglong(shift)
  673. func rotateLeftBits*[T: SomeUnsignedInt](value: T, shift: range[0..(sizeof(T) * 8)]): T {.inline.} =
  674. ## Left-rotate bits in a `value`.
  675. runnableExamples:
  676. doAssert rotateLeftBits(0b0110_1001'u8, 4) == 0b1001_0110'u8
  677. doAssert rotateLeftBits(0b00111100_11000011'u16, 8) ==
  678. 0b11000011_00111100'u16
  679. doAssert rotateLeftBits(0b0000111111110000_1111000000001111'u32, 16) ==
  680. 0b1111000000001111_0000111111110000'u32
  681. doAssert rotateLeftBits(0b00000000111111111111111100000000_11111111000000000000000011111111'u64, 32) ==
  682. 0b11111111000000000000000011111111_00000000111111111111111100000000'u64
  683. when nimvm:
  684. rotl(value, shift.int32)
  685. else:
  686. when useBuiltinsRotate:
  687. const size = sizeof(T)
  688. when size == 1:
  689. builtin_rotl8(value.uint8, shiftTypeTo(size, shift)).T
  690. elif size == 2:
  691. builtin_rotl16(value.cushort, shiftTypeTo(size, shift)).T
  692. elif size == 4:
  693. builtin_rotl32(value.cuint, shiftTypeTo(size, shift)).T
  694. elif size == 8 and arch64:
  695. builtin_rotl64(value.culonglong, shiftTypeTo(size, shift)).T
  696. else:
  697. rotl(value, shift.int32)
  698. else:
  699. rotl(value, shift.int32)
  700. func rotateRightBits*[T: SomeUnsignedInt](value: T, shift: range[0..(sizeof(T) * 8)]): T {.inline.} =
  701. ## Right-rotate bits in a `value`.
  702. runnableExamples:
  703. doAssert rotateRightBits(0b0110_1001'u8, 4) == 0b1001_0110'u8
  704. doAssert rotateRightBits(0b00111100_11000011'u16, 8) ==
  705. 0b11000011_00111100'u16
  706. doAssert rotateRightBits(0b0000111111110000_1111000000001111'u32, 16) ==
  707. 0b1111000000001111_0000111111110000'u32
  708. doAssert rotateRightBits(0b00000000111111111111111100000000_11111111000000000000000011111111'u64, 32) ==
  709. 0b11111111000000000000000011111111_00000000111111111111111100000000'u64
  710. when nimvm:
  711. rotr(value, shift.int32)
  712. else:
  713. when useBuiltinsRotate:
  714. const size = sizeof(T)
  715. when size == 1:
  716. builtin_rotr8(value.uint8, shiftTypeTo(size, shift)).T
  717. elif size == 2:
  718. builtin_rotr16(value.cushort, shiftTypeTo(size, shift)).T
  719. elif size == 4:
  720. builtin_rotr32(value.cuint, shiftTypeTo(size, shift)).T
  721. elif size == 8 and arch64:
  722. builtin_rotr64(value.culonglong, shiftTypeTo(size, shift)).T
  723. else:
  724. rotr(value, shift.int32)
  725. else:
  726. rotr(value, shift.int32)
  727. func repeatBits[T: SomeUnsignedInt](x: SomeUnsignedInt; retType: type[T]): T =
  728. result = x
  729. var i = 1
  730. while i != (sizeof(T) div sizeof(x)):
  731. result = (result shl (sizeof(x)*8*i)) or result
  732. i *= 2
  733. func reverseBits*[T: SomeUnsignedInt](x: T): T =
  734. ## Return the bit reversal of x.
  735. runnableExamples:
  736. doAssert reverseBits(0b10100100'u8) == 0b00100101'u8
  737. doAssert reverseBits(0xdd'u8) == 0xbb'u8
  738. doAssert reverseBits(0xddbb'u16) == 0xddbb'u16
  739. doAssert reverseBits(0xdeadbeef'u32) == 0xf77db57b'u32
  740. template repeat(x: SomeUnsignedInt): T = repeatBits(x, T)
  741. result = x
  742. result =
  743. ((repeat(0x55u8) and result) shl 1) or
  744. ((repeat(0xaau8) and result) shr 1)
  745. result =
  746. ((repeat(0x33u8) and result) shl 2) or
  747. ((repeat(0xccu8) and result) shr 2)
  748. when sizeof(T) == 1:
  749. result = (result shl 4) or (result shr 4)
  750. when sizeof(T) >= 2:
  751. result =
  752. ((repeat(0x0fu8) and result) shl 4) or
  753. ((repeat(0xf0u8) and result) shr 4)
  754. when sizeof(T) == 2:
  755. result = (result shl 8) or (result shr 8)
  756. when sizeof(T) >= 4:
  757. result =
  758. ((repeat(0x00ffu16) and result) shl 8) or
  759. ((repeat(0xff00u16) and result) shr 8)
  760. when sizeof(T) == 4:
  761. result = (result shl 16) or (result shr 16)
  762. when sizeof(T) == 8:
  763. result =
  764. ((repeat(0x0000ffffu32) and result) shl 16) or
  765. ((repeat(0xffff0000u32) and result) shr 16)
  766. result = (result shl 32) or (result shr 32)