inflate.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740
  1. // Copyright 2009 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. //go:generate go run gen.go -output fixedhuff.go
  5. // Package flate implements the DEFLATE compressed data format, described in
  6. // RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file
  7. // formats.
  8. package flate
  9. import (
  10. "bufio"
  11. "io"
  12. "strconv"
  13. )
  14. const (
  15. maxCodeLen = 16 // max length of Huffman code
  16. maxHist = 32768 // max history required
  17. // The next three numbers come from the RFC, section 3.2.7.
  18. maxLit = 286
  19. maxDist = 32
  20. numCodes = 19 // number of codes in Huffman meta-code
  21. )
  22. // A CorruptInputError reports the presence of corrupt input at a given offset.
  23. type CorruptInputError int64
  24. func (e CorruptInputError) Error() string {
  25. return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10)
  26. }
  27. // An InternalError reports an error in the flate code itself.
  28. type InternalError string
  29. func (e InternalError) Error() string { return "flate: internal error: " + string(e) }
  30. // A ReadError reports an error encountered while reading input.
  31. type ReadError struct {
  32. Offset int64 // byte offset where error occurred
  33. Err error // error returned by underlying Read
  34. }
  35. func (e *ReadError) Error() string {
  36. return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
  37. }
  38. // A WriteError reports an error encountered while writing output.
  39. type WriteError struct {
  40. Offset int64 // byte offset where error occurred
  41. Err error // error returned by underlying Write
  42. }
  43. func (e *WriteError) Error() string {
  44. return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
  45. }
  46. // Resetter resets a ReadCloser returned by NewReader or NewReaderDict to
  47. // to switch to a new underlying Reader. This permits reusing a ReadCloser
  48. // instead of allocating a new one.
  49. type Resetter interface {
  50. // Reset discards any buffered data and resets the Resetter as if it was
  51. // newly initialized with the given reader.
  52. Reset(r io.Reader, dict []byte) error
  53. }
  54. // Note that much of the implementation of huffmanDecoder is also copied
  55. // into gen.go (in package main) for the purpose of precomputing the
  56. // fixed huffman tables so they can be included statically.
  57. // The data structure for decoding Huffman tables is based on that of
  58. // zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
  59. // For codes smaller than the table width, there are multiple entries
  60. // (each combination of trailing bits has the same value). For codes
  61. // larger than the table width, the table contains a link to an overflow
  62. // table. The width of each entry in the link table is the maximum code
  63. // size minus the chunk width.
  64. // Note that you can do a lookup in the table even without all bits
  65. // filled. Since the extra bits are zero, and the DEFLATE Huffman codes
  66. // have the property that shorter codes come before longer ones, the
  67. // bit length estimate in the result is a lower bound on the actual
  68. // number of bits.
  69. // chunk & 15 is number of bits
  70. // chunk >> 4 is value, including table link
  71. const (
  72. huffmanChunkBits = 9
  73. huffmanNumChunks = 1 << huffmanChunkBits
  74. huffmanCountMask = 15
  75. huffmanValueShift = 4
  76. )
  77. type huffmanDecoder struct {
  78. min int // the minimum code length
  79. chunks [huffmanNumChunks]uint32 // chunks as described above
  80. links [][]uint32 // overflow links
  81. linkMask uint32 // mask the width of the link table
  82. }
  83. // Initialize Huffman decoding tables from array of code lengths.
  84. func (h *huffmanDecoder) init(bits []int) bool {
  85. if h.min != 0 {
  86. *h = huffmanDecoder{}
  87. }
  88. // Count number of codes of each length,
  89. // compute min and max length.
  90. var count [maxCodeLen]int
  91. var min, max int
  92. for _, n := range bits {
  93. if n == 0 {
  94. continue
  95. }
  96. if min == 0 || n < min {
  97. min = n
  98. }
  99. if n > max {
  100. max = n
  101. }
  102. count[n]++
  103. }
  104. if max == 0 {
  105. return false
  106. }
  107. h.min = min
  108. var linkBits uint
  109. var numLinks int
  110. if max > huffmanChunkBits {
  111. linkBits = uint(max) - huffmanChunkBits
  112. numLinks = 1 << linkBits
  113. h.linkMask = uint32(numLinks - 1)
  114. }
  115. code := 0
  116. var nextcode [maxCodeLen]int
  117. for i := min; i <= max; i++ {
  118. if i == huffmanChunkBits+1 {
  119. // create link tables
  120. link := code >> 1
  121. if huffmanNumChunks < link {
  122. return false
  123. }
  124. h.links = make([][]uint32, huffmanNumChunks-link)
  125. for j := uint(link); j < huffmanNumChunks; j++ {
  126. reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8
  127. reverse >>= uint(16 - huffmanChunkBits)
  128. off := j - uint(link)
  129. h.chunks[reverse] = uint32(off<<huffmanValueShift + uint(i))
  130. h.links[off] = make([]uint32, 1<<linkBits)
  131. }
  132. }
  133. n := count[i]
  134. nextcode[i] = code
  135. code += n
  136. code <<= 1
  137. }
  138. for i, n := range bits {
  139. if n == 0 {
  140. continue
  141. }
  142. code := nextcode[n]
  143. nextcode[n]++
  144. chunk := uint32(i<<huffmanValueShift | n)
  145. reverse := int(reverseByte[code>>8]) | int(reverseByte[code&0xff])<<8
  146. reverse >>= uint(16 - n)
  147. if n <= huffmanChunkBits {
  148. for off := reverse; off < huffmanNumChunks; off += 1 << uint(n) {
  149. h.chunks[off] = chunk
  150. }
  151. } else {
  152. value := h.chunks[reverse&(huffmanNumChunks-1)] >> huffmanValueShift
  153. if value >= uint32(len(h.links)) {
  154. return false
  155. }
  156. linktab := h.links[value]
  157. reverse >>= huffmanChunkBits
  158. for off := reverse; off < numLinks; off += 1 << uint(n-huffmanChunkBits) {
  159. linktab[off] = chunk
  160. }
  161. }
  162. }
  163. return true
  164. }
  165. // The actual read interface needed by NewReader.
  166. // If the passed in io.Reader does not also have ReadByte,
  167. // the NewReader will introduce its own buffering.
  168. type Reader interface {
  169. io.Reader
  170. io.ByteReader
  171. }
  172. // Decompress state.
  173. type decompressor struct {
  174. // Input source.
  175. r Reader
  176. roffset int64
  177. woffset int64
  178. // Input bits, in top of b.
  179. b uint32
  180. nb uint
  181. // Huffman decoders for literal/length, distance.
  182. h1, h2 huffmanDecoder
  183. // Length arrays used to define Huffman codes.
  184. bits *[maxLit + maxDist]int
  185. codebits *[numCodes]int
  186. // Output history, buffer.
  187. hist *[maxHist]byte
  188. hp int // current output position in buffer
  189. hw int // have written hist[0:hw] already
  190. hfull bool // buffer has filled at least once
  191. // Temporary buffer (avoids repeated allocation).
  192. buf [4]byte
  193. // Next step in the decompression,
  194. // and decompression state.
  195. step func(*decompressor)
  196. final bool
  197. err error
  198. toRead []byte
  199. hl, hd *huffmanDecoder
  200. copyLen int
  201. copyDist int
  202. }
  203. func (f *decompressor) nextBlock() {
  204. if f.final {
  205. if f.hw != f.hp {
  206. f.flush((*decompressor).nextBlock)
  207. return
  208. }
  209. f.err = io.EOF
  210. return
  211. }
  212. for f.nb < 1+2 {
  213. if f.err = f.moreBits(); f.err != nil {
  214. return
  215. }
  216. }
  217. f.final = f.b&1 == 1
  218. f.b >>= 1
  219. typ := f.b & 3
  220. f.b >>= 2
  221. f.nb -= 1 + 2
  222. switch typ {
  223. case 0:
  224. f.dataBlock()
  225. case 1:
  226. // compressed, fixed Huffman tables
  227. f.hl = &fixedHuffmanDecoder
  228. f.hd = nil
  229. f.huffmanBlock()
  230. case 2:
  231. // compressed, dynamic Huffman tables
  232. if f.err = f.readHuffman(); f.err != nil {
  233. break
  234. }
  235. f.hl = &f.h1
  236. f.hd = &f.h2
  237. f.huffmanBlock()
  238. default:
  239. // 3 is reserved.
  240. f.err = CorruptInputError(f.roffset)
  241. }
  242. }
  243. func (f *decompressor) Read(b []byte) (int, error) {
  244. for {
  245. if len(f.toRead) > 0 {
  246. n := copy(b, f.toRead)
  247. f.toRead = f.toRead[n:]
  248. return n, nil
  249. }
  250. if f.err != nil {
  251. return 0, f.err
  252. }
  253. f.step(f)
  254. }
  255. }
  256. func (f *decompressor) Close() error {
  257. if f.err == io.EOF {
  258. return nil
  259. }
  260. return f.err
  261. }
  262. // RFC 1951 section 3.2.7.
  263. // Compression with dynamic Huffman codes
  264. var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
  265. func (f *decompressor) readHuffman() error {
  266. // HLIT[5], HDIST[5], HCLEN[4].
  267. for f.nb < 5+5+4 {
  268. if err := f.moreBits(); err != nil {
  269. return err
  270. }
  271. }
  272. nlit := int(f.b&0x1F) + 257
  273. if nlit > maxLit {
  274. return CorruptInputError(f.roffset)
  275. }
  276. f.b >>= 5
  277. ndist := int(f.b&0x1F) + 1
  278. // maxDist is 32, so ndist is always valid.
  279. f.b >>= 5
  280. nclen := int(f.b&0xF) + 4
  281. // numCodes is 19, so nclen is always valid.
  282. f.b >>= 4
  283. f.nb -= 5 + 5 + 4
  284. // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order.
  285. for i := 0; i < nclen; i++ {
  286. for f.nb < 3 {
  287. if err := f.moreBits(); err != nil {
  288. return err
  289. }
  290. }
  291. f.codebits[codeOrder[i]] = int(f.b & 0x7)
  292. f.b >>= 3
  293. f.nb -= 3
  294. }
  295. for i := nclen; i < len(codeOrder); i++ {
  296. f.codebits[codeOrder[i]] = 0
  297. }
  298. if !f.h1.init(f.codebits[0:]) {
  299. return CorruptInputError(f.roffset)
  300. }
  301. // HLIT + 257 code lengths, HDIST + 1 code lengths,
  302. // using the code length Huffman code.
  303. for i, n := 0, nlit+ndist; i < n; {
  304. x, err := f.huffSym(&f.h1)
  305. if err != nil {
  306. return err
  307. }
  308. if x < 16 {
  309. // Actual length.
  310. f.bits[i] = x
  311. i++
  312. continue
  313. }
  314. // Repeat previous length or zero.
  315. var rep int
  316. var nb uint
  317. var b int
  318. switch x {
  319. default:
  320. return InternalError("unexpected length code")
  321. case 16:
  322. rep = 3
  323. nb = 2
  324. if i == 0 {
  325. return CorruptInputError(f.roffset)
  326. }
  327. b = f.bits[i-1]
  328. case 17:
  329. rep = 3
  330. nb = 3
  331. b = 0
  332. case 18:
  333. rep = 11
  334. nb = 7
  335. b = 0
  336. }
  337. for f.nb < nb {
  338. if err := f.moreBits(); err != nil {
  339. return err
  340. }
  341. }
  342. rep += int(f.b & uint32(1<<nb-1))
  343. f.b >>= nb
  344. f.nb -= nb
  345. if i+rep > n {
  346. return CorruptInputError(f.roffset)
  347. }
  348. for j := 0; j < rep; j++ {
  349. f.bits[i] = b
  350. i++
  351. }
  352. }
  353. if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
  354. return CorruptInputError(f.roffset)
  355. }
  356. return nil
  357. }
  358. // Decode a single Huffman block from f.
  359. // hl and hd are the Huffman states for the lit/length values
  360. // and the distance values, respectively. If hd == nil, using the
  361. // fixed distance encoding associated with fixed Huffman blocks.
  362. func (f *decompressor) huffmanBlock() {
  363. for {
  364. v, err := f.huffSym(f.hl)
  365. if err != nil {
  366. f.err = err
  367. return
  368. }
  369. var n uint // number of bits extra
  370. var length int
  371. switch {
  372. case v < 256:
  373. f.hist[f.hp] = byte(v)
  374. f.hp++
  375. if f.hp == len(f.hist) {
  376. // After the flush, continue this loop.
  377. f.flush((*decompressor).huffmanBlock)
  378. return
  379. }
  380. continue
  381. case v == 256:
  382. // Done with huffman block; read next block.
  383. f.step = (*decompressor).nextBlock
  384. return
  385. // otherwise, reference to older data
  386. case v < 265:
  387. length = v - (257 - 3)
  388. n = 0
  389. case v < 269:
  390. length = v*2 - (265*2 - 11)
  391. n = 1
  392. case v < 273:
  393. length = v*4 - (269*4 - 19)
  394. n = 2
  395. case v < 277:
  396. length = v*8 - (273*8 - 35)
  397. n = 3
  398. case v < 281:
  399. length = v*16 - (277*16 - 67)
  400. n = 4
  401. case v < 285:
  402. length = v*32 - (281*32 - 131)
  403. n = 5
  404. default:
  405. length = 258
  406. n = 0
  407. }
  408. if n > 0 {
  409. for f.nb < n {
  410. if err = f.moreBits(); err != nil {
  411. f.err = err
  412. return
  413. }
  414. }
  415. length += int(f.b & uint32(1<<n-1))
  416. f.b >>= n
  417. f.nb -= n
  418. }
  419. var dist int
  420. if f.hd == nil {
  421. for f.nb < 5 {
  422. if err = f.moreBits(); err != nil {
  423. f.err = err
  424. return
  425. }
  426. }
  427. dist = int(reverseByte[(f.b&0x1F)<<3])
  428. f.b >>= 5
  429. f.nb -= 5
  430. } else {
  431. if dist, err = f.huffSym(f.hd); err != nil {
  432. f.err = err
  433. return
  434. }
  435. }
  436. switch {
  437. case dist < 4:
  438. dist++
  439. case dist >= 30:
  440. f.err = CorruptInputError(f.roffset)
  441. return
  442. default:
  443. nb := uint(dist-2) >> 1
  444. // have 1 bit in bottom of dist, need nb more.
  445. extra := (dist & 1) << nb
  446. for f.nb < nb {
  447. if err = f.moreBits(); err != nil {
  448. f.err = err
  449. return
  450. }
  451. }
  452. extra |= int(f.b & uint32(1<<nb-1))
  453. f.b >>= nb
  454. f.nb -= nb
  455. dist = 1<<(nb+1) + 1 + extra
  456. }
  457. // Copy history[-dist:-dist+length] into output.
  458. if dist > len(f.hist) {
  459. f.err = InternalError("bad history distance")
  460. return
  461. }
  462. // No check on length; encoding can be prescient.
  463. if !f.hfull && dist > f.hp {
  464. f.err = CorruptInputError(f.roffset)
  465. return
  466. }
  467. f.copyLen, f.copyDist = length, dist
  468. if f.copyHist() {
  469. return
  470. }
  471. }
  472. }
  473. // copyHist copies f.copyLen bytes from f.hist (f.copyDist bytes ago) to itself.
  474. // It reports whether the f.hist buffer is full.
  475. func (f *decompressor) copyHist() bool {
  476. p := f.hp - f.copyDist
  477. if p < 0 {
  478. p += len(f.hist)
  479. }
  480. for f.copyLen > 0 {
  481. n := f.copyLen
  482. if x := len(f.hist) - f.hp; n > x {
  483. n = x
  484. }
  485. if x := len(f.hist) - p; n > x {
  486. n = x
  487. }
  488. forwardCopy(f.hist[:], f.hp, p, n)
  489. p += n
  490. f.hp += n
  491. f.copyLen -= n
  492. if f.hp == len(f.hist) {
  493. // After flush continue copying out of history.
  494. f.flush((*decompressor).copyHuff)
  495. return true
  496. }
  497. if p == len(f.hist) {
  498. p = 0
  499. }
  500. }
  501. return false
  502. }
  503. func (f *decompressor) copyHuff() {
  504. if f.copyHist() {
  505. return
  506. }
  507. f.huffmanBlock()
  508. }
  509. // Copy a single uncompressed data block from input to output.
  510. func (f *decompressor) dataBlock() {
  511. // Uncompressed.
  512. // Discard current half-byte.
  513. f.nb = 0
  514. f.b = 0
  515. // Length then ones-complement of length.
  516. nr, err := io.ReadFull(f.r, f.buf[0:4])
  517. f.roffset += int64(nr)
  518. if err != nil {
  519. f.err = &ReadError{f.roffset, err}
  520. return
  521. }
  522. n := int(f.buf[0]) | int(f.buf[1])<<8
  523. nn := int(f.buf[2]) | int(f.buf[3])<<8
  524. if uint16(nn) != uint16(^n) {
  525. f.err = CorruptInputError(f.roffset)
  526. return
  527. }
  528. if n == 0 {
  529. // 0-length block means sync
  530. f.flush((*decompressor).nextBlock)
  531. return
  532. }
  533. f.copyLen = n
  534. f.copyData()
  535. }
  536. // copyData copies f.copyLen bytes from the underlying reader into f.hist.
  537. // It pauses for reads when f.hist is full.
  538. func (f *decompressor) copyData() {
  539. n := f.copyLen
  540. for n > 0 {
  541. m := len(f.hist) - f.hp
  542. if m > n {
  543. m = n
  544. }
  545. m, err := io.ReadFull(f.r, f.hist[f.hp:f.hp+m])
  546. f.roffset += int64(m)
  547. if err != nil {
  548. f.err = &ReadError{f.roffset, err}
  549. return
  550. }
  551. n -= m
  552. f.hp += m
  553. if f.hp == len(f.hist) {
  554. f.copyLen = n
  555. f.flush((*decompressor).copyData)
  556. return
  557. }
  558. }
  559. f.step = (*decompressor).nextBlock
  560. }
  561. func (f *decompressor) setDict(dict []byte) {
  562. if len(dict) > len(f.hist) {
  563. // Will only remember the tail.
  564. dict = dict[len(dict)-len(f.hist):]
  565. }
  566. f.hp = copy(f.hist[:], dict)
  567. if f.hp == len(f.hist) {
  568. f.hp = 0
  569. f.hfull = true
  570. }
  571. f.hw = f.hp
  572. }
  573. func (f *decompressor) moreBits() error {
  574. c, err := f.r.ReadByte()
  575. if err != nil {
  576. if err == io.EOF {
  577. err = io.ErrUnexpectedEOF
  578. }
  579. return err
  580. }
  581. f.roffset++
  582. f.b |= uint32(c) << f.nb
  583. f.nb += 8
  584. return nil
  585. }
  586. // Read the next Huffman-encoded symbol from f according to h.
  587. func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
  588. n := uint(h.min)
  589. for {
  590. for f.nb < n {
  591. if err := f.moreBits(); err != nil {
  592. return 0, err
  593. }
  594. }
  595. chunk := h.chunks[f.b&(huffmanNumChunks-1)]
  596. n = uint(chunk & huffmanCountMask)
  597. if n > huffmanChunkBits {
  598. chunk = h.links[chunk>>huffmanValueShift][(f.b>>huffmanChunkBits)&h.linkMask]
  599. n = uint(chunk & huffmanCountMask)
  600. if n == 0 {
  601. f.err = CorruptInputError(f.roffset)
  602. return 0, f.err
  603. }
  604. }
  605. if n <= f.nb {
  606. f.b >>= n
  607. f.nb -= n
  608. return int(chunk >> huffmanValueShift), nil
  609. }
  610. }
  611. }
  612. // Flush any buffered output to the underlying writer.
  613. func (f *decompressor) flush(step func(*decompressor)) {
  614. f.toRead = f.hist[f.hw:f.hp]
  615. f.woffset += int64(f.hp - f.hw)
  616. f.hw = f.hp
  617. if f.hp == len(f.hist) {
  618. f.hp = 0
  619. f.hw = 0
  620. f.hfull = true
  621. }
  622. f.step = step
  623. }
  624. func makeReader(r io.Reader) Reader {
  625. if rr, ok := r.(Reader); ok {
  626. return rr
  627. }
  628. return bufio.NewReader(r)
  629. }
  630. func (f *decompressor) Reset(r io.Reader, dict []byte) error {
  631. *f = decompressor{
  632. r: makeReader(r),
  633. bits: f.bits,
  634. codebits: f.codebits,
  635. hist: f.hist,
  636. step: (*decompressor).nextBlock,
  637. }
  638. if dict != nil {
  639. f.setDict(dict)
  640. }
  641. return nil
  642. }
  643. // NewReader returns a new ReadCloser that can be used
  644. // to read the uncompressed version of r.
  645. // If r does not also implement io.ByteReader,
  646. // the decompressor may read more data than necessary from r.
  647. // It is the caller's responsibility to call Close on the ReadCloser
  648. // when finished reading.
  649. //
  650. // The ReadCloser returned by NewReader also implements Resetter.
  651. func NewReader(r io.Reader) io.ReadCloser {
  652. var f decompressor
  653. f.bits = new([maxLit + maxDist]int)
  654. f.codebits = new([numCodes]int)
  655. f.r = makeReader(r)
  656. f.hist = new([maxHist]byte)
  657. f.step = (*decompressor).nextBlock
  658. return &f
  659. }
  660. // NewReaderDict is like NewReader but initializes the reader
  661. // with a preset dictionary. The returned Reader behaves as if
  662. // the uncompressed data stream started with the given dictionary,
  663. // which has already been read. NewReaderDict is typically used
  664. // to read data compressed by NewWriterDict.
  665. //
  666. // The ReadCloser returned by NewReader also implements Resetter.
  667. func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
  668. var f decompressor
  669. f.r = makeReader(r)
  670. f.hist = new([maxHist]byte)
  671. f.bits = new([maxLit + maxDist]int)
  672. f.codebits = new([numCodes]int)
  673. f.step = (*decompressor).nextBlock
  674. f.setDict(dict)
  675. return &f
  676. }