countbits_impl.nim 3.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2012 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## Contains the used algorithms for counting bits.
  10. from std/private/bitops_utils import forwardImpl, castToUnsigned
  11. const useBuiltins* = not defined(noIntrinsicsBitOpts)
  12. const noUndefined* = defined(noUndefinedBitOpts)
  13. const useGCC_builtins* = (defined(gcc) or defined(llvm_gcc) or
  14. defined(clang)) and useBuiltins
  15. const useICC_builtins* = defined(icc) and useBuiltins
  16. const useVCC_builtins* = defined(vcc) and useBuiltins
  17. const arch64* = sizeof(int) == 8
  18. template countBitsImpl(n: uint32): int =
  19. # generic formula is from: https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
  20. var v = uint32(n)
  21. v = v - ((v shr 1'u32) and 0x55555555'u32)
  22. v = (v and 0x33333333'u32) + ((v shr 2'u32) and 0x33333333'u32)
  23. (((v + (v shr 4'u32) and 0xF0F0F0F'u32) * 0x1010101'u32) shr 24'u32).int
  24. template countBitsImpl(n: uint64): int =
  25. # generic formula is from: https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
  26. var v = uint64(n)
  27. v = v - ((v shr 1'u64) and 0x5555555555555555'u64)
  28. v = (v and 0x3333333333333333'u64) + ((v shr 2'u64) and 0x3333333333333333'u64)
  29. v = (v + (v shr 4'u64) and 0x0F0F0F0F0F0F0F0F'u64)
  30. ((v * 0x0101010101010101'u64) shr 56'u64).int
  31. when useGCC_builtins:
  32. # Returns the number of set 1-bits in value.
  33. proc builtin_popcount(x: cuint): cint {.importc: "__builtin_popcount", cdecl.}
  34. proc builtin_popcountll(x: culonglong): cint {.
  35. importc: "__builtin_popcountll", cdecl.}
  36. elif useVCC_builtins:
  37. # Counts the number of one bits (population count) in a 16-, 32-, or 64-byte unsigned integer.
  38. func builtin_popcnt16(a2: uint16): uint16 {.
  39. importc: "__popcnt16", header: "<intrin.h>".}
  40. func builtin_popcnt32(a2: uint32): uint32 {.
  41. importc: "__popcnt", header: "<intrin.h>".}
  42. func builtin_popcnt64(a2: uint64): uint64 {.
  43. importc: "__popcnt64", header: "<intrin.h>".}
  44. elif useICC_builtins:
  45. # Intel compiler intrinsics: http://fulla.fnal.gov/intel/compiler_c/main_cls/intref_cls/common/intref_allia_misc.htm
  46. # see also: https://software.intel.com/en-us/node/523362
  47. # Count the number of bits set to 1 in an integer a, and return that count in dst.
  48. func builtin_popcnt32(a: cint): cint {.
  49. importc: "_popcnt", header: "<immintrin.h>".}
  50. func builtin_popcnt64(a: uint64): cint {.
  51. importc: "_popcnt64", header: "<immintrin.h>".}
  52. func countSetBitsImpl*(x: SomeInteger): int {.inline.} =
  53. ## Counts the set bits in an integer (also called `Hamming weight`:idx:).
  54. # TODO: figure out if ICC support _popcnt32/_popcnt64 on platform without POPCNT.
  55. # like GCC and MSVC
  56. let x = x.castToUnsigned
  57. when nimvm:
  58. result = forwardImpl(countBitsImpl, x)
  59. else:
  60. when useGCC_builtins:
  61. when sizeof(x) <= 4: result = builtin_popcount(x.cuint).int
  62. else: result = builtin_popcountll(x.culonglong).int
  63. elif useVCC_builtins:
  64. when sizeof(x) <= 2: result = builtin_popcnt16(x.uint16).int
  65. elif sizeof(x) <= 4: result = builtin_popcnt32(x.uint32).int
  66. elif arch64: result = builtin_popcnt64(x.uint64).int
  67. else: result = builtin_popcnt32((x.uint64 and 0xFFFFFFFF'u64).uint32).int +
  68. builtin_popcnt32((x.uint64 shr 32'u64).uint32).int
  69. elif useICC_builtins:
  70. when sizeof(x) <= 4: result = builtin_popcnt32(x.cint).int
  71. elif arch64: result = builtin_popcnt64(x.uint64).int
  72. else: result = builtin_popcnt32((x.uint64 and 0xFFFFFFFF'u64).cint).int +
  73. builtin_popcnt32((x.uint64 shr 32'u64).cint).int
  74. else:
  75. when sizeof(x) <= 4: result = countBitsImpl(x.uint32)
  76. else: result = countBitsImpl(x.uint64)
  77. proc countBits32*(n: uint32): int {.compilerproc, inline.} =
  78. result = countSetBitsImpl(n)
  79. proc countBits64*(n: uint64): int {.compilerproc, inline.} =
  80. result = countSetBitsImpl(n)