huffman_code.go 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324
  1. // Copyright 2009 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package flate
  5. import (
  6. "math"
  7. "sort"
  8. )
  9. type huffmanEncoder struct {
  10. codeBits []uint8
  11. code []uint16
  12. }
  13. type literalNode struct {
  14. literal uint16
  15. freq int32
  16. }
  17. // A levelInfo describes the state of the constructed tree for a given depth.
  18. type levelInfo struct {
  19. // Our level. for better printing
  20. level int32
  21. // The frequency of the last node at this level
  22. lastFreq int32
  23. // The frequency of the next character to add to this level
  24. nextCharFreq int32
  25. // The frequency of the next pair (from level below) to add to this level.
  26. // Only valid if the "needed" value of the next lower level is 0.
  27. nextPairFreq int32
  28. // The number of chains remaining to generate for this level before moving
  29. // up to the next level
  30. needed int32
  31. }
  32. func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxInt32} }
  33. func newHuffmanEncoder(size int) *huffmanEncoder {
  34. return &huffmanEncoder{make([]uint8, size), make([]uint16, size)}
  35. }
  36. // Generates a HuffmanCode corresponding to the fixed literal table
  37. func generateFixedLiteralEncoding() *huffmanEncoder {
  38. h := newHuffmanEncoder(maxLit)
  39. codeBits := h.codeBits
  40. code := h.code
  41. var ch uint16
  42. for ch = 0; ch < maxLit; ch++ {
  43. var bits uint16
  44. var size uint8
  45. switch {
  46. case ch < 144:
  47. // size 8, 000110000 .. 10111111
  48. bits = ch + 48
  49. size = 8
  50. break
  51. case ch < 256:
  52. // size 9, 110010000 .. 111111111
  53. bits = ch + 400 - 144
  54. size = 9
  55. break
  56. case ch < 280:
  57. // size 7, 0000000 .. 0010111
  58. bits = ch - 256
  59. size = 7
  60. break
  61. default:
  62. // size 8, 11000000 .. 11000111
  63. bits = ch + 192 - 280
  64. size = 8
  65. }
  66. codeBits[ch] = size
  67. code[ch] = reverseBits(bits, size)
  68. }
  69. return h
  70. }
  71. func generateFixedOffsetEncoding() *huffmanEncoder {
  72. h := newHuffmanEncoder(30)
  73. codeBits := h.codeBits
  74. code := h.code
  75. for ch := uint16(0); ch < 30; ch++ {
  76. codeBits[ch] = 5
  77. code[ch] = reverseBits(ch, 5)
  78. }
  79. return h
  80. }
  81. var fixedLiteralEncoding *huffmanEncoder = generateFixedLiteralEncoding()
  82. var fixedOffsetEncoding *huffmanEncoder = generateFixedOffsetEncoding()
  83. func (h *huffmanEncoder) bitLength(freq []int32) int64 {
  84. var total int64
  85. for i, f := range freq {
  86. if f != 0 {
  87. total += int64(f) * int64(h.codeBits[i])
  88. }
  89. }
  90. return total
  91. }
  92. const maxBitsLimit = 16
  93. // Return the number of literals assigned to each bit size in the Huffman encoding
  94. //
  95. // This method is only called when list.length >= 3
  96. // The cases of 0, 1, and 2 literals are handled by special case code.
  97. //
  98. // list An array of the literals with non-zero frequencies
  99. // and their associated frequencies. The array is in order of increasing
  100. // frequency, and has as its last element a special element with frequency
  101. // MaxInt32
  102. // maxBits The maximum number of bits that should be used to encode any literal.
  103. // Must be less than 16.
  104. // return An integer array in which array[i] indicates the number of literals
  105. // that should be encoded in i bits.
  106. func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
  107. if maxBits >= maxBitsLimit {
  108. panic("flate: maxBits too large")
  109. }
  110. n := int32(len(list))
  111. list = list[0 : n+1]
  112. list[n] = maxNode()
  113. // The tree can't have greater depth than n - 1, no matter what. This
  114. // saves a little bit of work in some small cases
  115. if maxBits > n-1 {
  116. maxBits = n - 1
  117. }
  118. // Create information about each of the levels.
  119. // A bogus "Level 0" whose sole purpose is so that
  120. // level1.prev.needed==0. This makes level1.nextPairFreq
  121. // be a legitimate value that never gets chosen.
  122. var levels [maxBitsLimit]levelInfo
  123. // leafCounts[i] counts the number of literals at the left
  124. // of ancestors of the rightmost node at level i.
  125. // leafCounts[i][j] is the number of literals at the left
  126. // of the level j ancestor.
  127. var leafCounts [maxBitsLimit][maxBitsLimit]int32
  128. for level := int32(1); level <= maxBits; level++ {
  129. // For every level, the first two items are the first two characters.
  130. // We initialize the levels as if we had already figured this out.
  131. levels[level] = levelInfo{
  132. level: level,
  133. lastFreq: list[1].freq,
  134. nextCharFreq: list[2].freq,
  135. nextPairFreq: list[0].freq + list[1].freq,
  136. }
  137. leafCounts[level][level] = 2
  138. if level == 1 {
  139. levels[level].nextPairFreq = math.MaxInt32
  140. }
  141. }
  142. // We need a total of 2*n - 2 items at top level and have already generated 2.
  143. levels[maxBits].needed = 2*n - 4
  144. level := maxBits
  145. for {
  146. l := &levels[level]
  147. if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 {
  148. // We've run out of both leafs and pairs.
  149. // End all calculations for this level.
  150. // To make sure we never come back to this level or any lower level,
  151. // set nextPairFreq impossibly large.
  152. l.needed = 0
  153. levels[level+1].nextPairFreq = math.MaxInt32
  154. level++
  155. continue
  156. }
  157. prevFreq := l.lastFreq
  158. if l.nextCharFreq < l.nextPairFreq {
  159. // The next item on this row is a leaf node.
  160. n := leafCounts[level][level] + 1
  161. l.lastFreq = l.nextCharFreq
  162. // Lower leafCounts are the same of the previous node.
  163. leafCounts[level][level] = n
  164. l.nextCharFreq = list[n].freq
  165. } else {
  166. // The next item on this row is a pair from the previous row.
  167. // nextPairFreq isn't valid until we generate two
  168. // more values in the level below
  169. l.lastFreq = l.nextPairFreq
  170. // Take leaf counts from the lower level, except counts[level] remains the same.
  171. copy(leafCounts[level][:level], leafCounts[level-1][:level])
  172. levels[l.level-1].needed = 2
  173. }
  174. if l.needed--; l.needed == 0 {
  175. // We've done everything we need to do for this level.
  176. // Continue calculating one level up. Fill in nextPairFreq
  177. // of that level with the sum of the two nodes we've just calculated on
  178. // this level.
  179. if l.level == maxBits {
  180. // All done!
  181. break
  182. }
  183. levels[l.level+1].nextPairFreq = prevFreq + l.lastFreq
  184. level++
  185. } else {
  186. // If we stole from below, move down temporarily to replenish it.
  187. for levels[level-1].needed > 0 {
  188. level--
  189. }
  190. }
  191. }
  192. // Somethings is wrong if at the end, the top level is null or hasn't used
  193. // all of the leaves.
  194. if leafCounts[maxBits][maxBits] != n {
  195. panic("leafCounts[maxBits][maxBits] != n")
  196. }
  197. bitCount := make([]int32, maxBits+1)
  198. bits := 1
  199. counts := &leafCounts[maxBits]
  200. for level := maxBits; level > 0; level-- {
  201. // chain.leafCount gives the number of literals requiring at least "bits"
  202. // bits to encode.
  203. bitCount[bits] = counts[level] - counts[level-1]
  204. bits++
  205. }
  206. return bitCount
  207. }
  208. // Look at the leaves and assign them a bit count and an encoding as specified
  209. // in RFC 1951 3.2.2
  210. func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) {
  211. code := uint16(0)
  212. for n, bits := range bitCount {
  213. code <<= 1
  214. if n == 0 || bits == 0 {
  215. continue
  216. }
  217. // The literals list[len(list)-bits] .. list[len(list)-bits]
  218. // are encoded using "bits" bits, and get the values
  219. // code, code + 1, .... The code values are
  220. // assigned in literal order (not frequency order).
  221. chunk := list[len(list)-int(bits):]
  222. sortByLiteral(chunk)
  223. for _, node := range chunk {
  224. h.codeBits[node.literal] = uint8(n)
  225. h.code[node.literal] = reverseBits(code, uint8(n))
  226. code++
  227. }
  228. list = list[0 : len(list)-int(bits)]
  229. }
  230. }
  231. // Update this Huffman Code object to be the minimum code for the specified frequency count.
  232. //
  233. // freq An array of frequencies, in which frequency[i] gives the frequency of literal i.
  234. // maxBits The maximum number of bits to use for any literal.
  235. func (h *huffmanEncoder) generate(freq []int32, maxBits int32) {
  236. list := make([]literalNode, len(freq)+1)
  237. // Number of non-zero literals
  238. count := 0
  239. // Set list to be the set of all non-zero literals and their frequencies
  240. for i, f := range freq {
  241. if f != 0 {
  242. list[count] = literalNode{uint16(i), f}
  243. count++
  244. } else {
  245. h.codeBits[i] = 0
  246. }
  247. }
  248. // If freq[] is shorter than codeBits[], fill rest of codeBits[] with zeros
  249. h.codeBits = h.codeBits[0:len(freq)]
  250. list = list[0:count]
  251. if count <= 2 {
  252. // Handle the small cases here, because they are awkward for the general case code. With
  253. // two or fewer literals, everything has bit length 1.
  254. for i, node := range list {
  255. // "list" is in order of increasing literal value.
  256. h.codeBits[node.literal] = 1
  257. h.code[node.literal] = uint16(i)
  258. }
  259. return
  260. }
  261. sortByFreq(list)
  262. // Get the number of literals for each bit count
  263. bitCount := h.bitCounts(list, maxBits)
  264. // And do the assignment
  265. h.assignEncodingAndSize(bitCount, list)
  266. }
  267. type literalNodeSorter struct {
  268. a []literalNode
  269. less func(i, j int) bool
  270. }
  271. func (s literalNodeSorter) Len() int { return len(s.a) }
  272. func (s literalNodeSorter) Less(i, j int) bool {
  273. return s.less(i, j)
  274. }
  275. func (s literalNodeSorter) Swap(i, j int) { s.a[i], s.a[j] = s.a[j], s.a[i] }
  276. func sortByFreq(a []literalNode) {
  277. s := &literalNodeSorter{a, func(i, j int) bool {
  278. if a[i].freq == a[j].freq {
  279. return a[i].literal < a[j].literal
  280. }
  281. return a[i].freq < a[j].freq
  282. }}
  283. sort.Sort(s)
  284. }
  285. func sortByLiteral(a []literalNode) {
  286. s := &literalNodeSorter{a, func(i, j int) bool { return a[i].literal < a[j].literal }}
  287. sort.Sort(s)
  288. }