decoder.go 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
  1. // Copyright 2014-2022 Ulrich Kunitz. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package lzma
  5. import (
  6. "errors"
  7. "fmt"
  8. "io"
  9. )
  10. // decoder decodes a raw LZMA stream without any header.
  11. type decoder struct {
  12. // dictionary; the rear pointer of the buffer will be used for
  13. // reading the data.
  14. Dict *decoderDict
  15. // decoder state
  16. State *state
  17. // range decoder
  18. rd *rangeDecoder
  19. // start stores the head value of the dictionary for the LZMA
  20. // stream
  21. start int64
  22. // size of uncompressed data
  23. size int64
  24. // end-of-stream encountered
  25. eos bool
  26. // EOS marker found
  27. eosMarker bool
  28. }
  29. // newDecoder creates a new decoder instance. The parameter size provides
  30. // the expected byte size of the decompressed data. If the size is
  31. // unknown use a negative value. In that case the decoder will look for
  32. // a terminating end-of-stream marker.
  33. func newDecoder(br io.ByteReader, state *state, dict *decoderDict, size int64) (d *decoder, err error) {
  34. rd, err := newRangeDecoder(br)
  35. if err != nil {
  36. return nil, err
  37. }
  38. d = &decoder{
  39. State: state,
  40. Dict: dict,
  41. rd: rd,
  42. size: size,
  43. start: dict.pos(),
  44. }
  45. return d, nil
  46. }
  47. // Reopen restarts the decoder with a new byte reader and a new size. Reopen
  48. // resets the Decompressed counter to zero.
  49. func (d *decoder) Reopen(br io.ByteReader, size int64) error {
  50. var err error
  51. if d.rd, err = newRangeDecoder(br); err != nil {
  52. return err
  53. }
  54. d.start = d.Dict.pos()
  55. d.size = size
  56. d.eos = false
  57. return nil
  58. }
  59. // decodeLiteral decodes a single literal from the LZMA stream.
  60. func (d *decoder) decodeLiteral() (op operation, err error) {
  61. litState := d.State.litState(d.Dict.byteAt(1), d.Dict.head)
  62. match := d.Dict.byteAt(int(d.State.rep[0]) + 1)
  63. s, err := d.State.litCodec.Decode(d.rd, d.State.state, match, litState)
  64. if err != nil {
  65. return nil, err
  66. }
  67. return lit{s}, nil
  68. }
  69. // errEOS indicates that an EOS marker has been found.
  70. var errEOS = errors.New("EOS marker found")
  71. // readOp decodes the next operation from the compressed stream. It
  72. // returns the operation. If an explicit end of stream marker is
  73. // identified the eos error is returned.
  74. func (d *decoder) readOp() (op operation, err error) {
  75. // Value of the end of stream (EOS) marker
  76. const eosDist = 1<<32 - 1
  77. state, state2, posState := d.State.states(d.Dict.head)
  78. b, err := d.State.isMatch[state2].Decode(d.rd)
  79. if err != nil {
  80. return nil, err
  81. }
  82. if b == 0 {
  83. // literal
  84. op, err := d.decodeLiteral()
  85. if err != nil {
  86. return nil, err
  87. }
  88. d.State.updateStateLiteral()
  89. return op, nil
  90. }
  91. b, err = d.State.isRep[state].Decode(d.rd)
  92. if err != nil {
  93. return nil, err
  94. }
  95. if b == 0 {
  96. // simple match
  97. d.State.rep[3], d.State.rep[2], d.State.rep[1] =
  98. d.State.rep[2], d.State.rep[1], d.State.rep[0]
  99. d.State.updateStateMatch()
  100. // The length decoder returns the length offset.
  101. n, err := d.State.lenCodec.Decode(d.rd, posState)
  102. if err != nil {
  103. return nil, err
  104. }
  105. // The dist decoder returns the distance offset. The actual
  106. // distance is 1 higher.
  107. d.State.rep[0], err = d.State.distCodec.Decode(d.rd, n)
  108. if err != nil {
  109. return nil, err
  110. }
  111. if d.State.rep[0] == eosDist {
  112. d.eosMarker = true
  113. return nil, errEOS
  114. }
  115. op = match{n: int(n) + minMatchLen,
  116. distance: int64(d.State.rep[0]) + minDistance}
  117. return op, nil
  118. }
  119. b, err = d.State.isRepG0[state].Decode(d.rd)
  120. if err != nil {
  121. return nil, err
  122. }
  123. dist := d.State.rep[0]
  124. if b == 0 {
  125. // rep match 0
  126. b, err = d.State.isRepG0Long[state2].Decode(d.rd)
  127. if err != nil {
  128. return nil, err
  129. }
  130. if b == 0 {
  131. d.State.updateStateShortRep()
  132. op = match{n: 1, distance: int64(dist) + minDistance}
  133. return op, nil
  134. }
  135. } else {
  136. b, err = d.State.isRepG1[state].Decode(d.rd)
  137. if err != nil {
  138. return nil, err
  139. }
  140. if b == 0 {
  141. dist = d.State.rep[1]
  142. } else {
  143. b, err = d.State.isRepG2[state].Decode(d.rd)
  144. if err != nil {
  145. return nil, err
  146. }
  147. if b == 0 {
  148. dist = d.State.rep[2]
  149. } else {
  150. dist = d.State.rep[3]
  151. d.State.rep[3] = d.State.rep[2]
  152. }
  153. d.State.rep[2] = d.State.rep[1]
  154. }
  155. d.State.rep[1] = d.State.rep[0]
  156. d.State.rep[0] = dist
  157. }
  158. n, err := d.State.repLenCodec.Decode(d.rd, posState)
  159. if err != nil {
  160. return nil, err
  161. }
  162. d.State.updateStateRep()
  163. op = match{n: int(n) + minMatchLen, distance: int64(dist) + minDistance}
  164. return op, nil
  165. }
  166. // apply takes the operation and transforms the decoder dictionary accordingly.
  167. func (d *decoder) apply(op operation) error {
  168. var err error
  169. switch x := op.(type) {
  170. case match:
  171. err = d.Dict.writeMatch(x.distance, x.n)
  172. case lit:
  173. err = d.Dict.WriteByte(x.b)
  174. default:
  175. panic("op is neither a match nor a literal")
  176. }
  177. return err
  178. }
  179. // decompress fills the dictionary unless no space for new data is
  180. // available. If the end of the LZMA stream has been reached io.EOF will
  181. // be returned.
  182. func (d *decoder) decompress() error {
  183. if d.eos {
  184. return io.EOF
  185. }
  186. for d.Dict.Available() >= maxMatchLen {
  187. op, err := d.readOp()
  188. switch err {
  189. case nil:
  190. // break
  191. case errEOS:
  192. d.eos = true
  193. if !d.rd.possiblyAtEnd() {
  194. return errDataAfterEOS
  195. }
  196. if d.size >= 0 && d.size != d.Decompressed() {
  197. return errSize
  198. }
  199. return io.EOF
  200. case io.EOF:
  201. d.eos = true
  202. return io.ErrUnexpectedEOF
  203. default:
  204. return err
  205. }
  206. if err = d.apply(op); err != nil {
  207. return err
  208. }
  209. if d.size >= 0 && d.Decompressed() >= d.size {
  210. d.eos = true
  211. if d.Decompressed() > d.size {
  212. return errSize
  213. }
  214. if !d.rd.possiblyAtEnd() {
  215. switch _, err = d.readOp(); err {
  216. case nil:
  217. return errSize
  218. case io.EOF:
  219. return io.ErrUnexpectedEOF
  220. case errEOS:
  221. break
  222. default:
  223. return err
  224. }
  225. }
  226. return io.EOF
  227. }
  228. }
  229. return nil
  230. }
  231. // Errors that may be returned while decoding data.
  232. var (
  233. errDataAfterEOS = errors.New("lzma: data after end of stream marker")
  234. errSize = errors.New("lzma: wrong uncompressed data size")
  235. )
  236. // Read reads data from the buffer. If no more data is available io.EOF is
  237. // returned.
  238. func (d *decoder) Read(p []byte) (n int, err error) {
  239. var k int
  240. for {
  241. // Read of decoder dict never returns an error.
  242. k, err = d.Dict.Read(p[n:])
  243. if err != nil {
  244. panic(fmt.Errorf("dictionary read error %s", err))
  245. }
  246. if k == 0 && d.eos {
  247. return n, io.EOF
  248. }
  249. n += k
  250. if n >= len(p) {
  251. return n, nil
  252. }
  253. if err = d.decompress(); err != nil && err != io.EOF {
  254. return n, err
  255. }
  256. }
  257. }
  258. // Decompressed returns the number of bytes decompressed by the decoder.
  259. func (d *decoder) Decompressed() int64 {
  260. return d.Dict.pos() - d.start
  261. }