| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277 |
- // Copyright 2014-2022 Ulrich Kunitz. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package lzma
- import (
- "errors"
- "fmt"
- "io"
- )
- // decoder decodes a raw LZMA stream without any header.
- type decoder struct {
- // dictionary; the rear pointer of the buffer will be used for
- // reading the data.
- Dict *decoderDict
- // decoder state
- State *state
- // range decoder
- rd *rangeDecoder
- // start stores the head value of the dictionary for the LZMA
- // stream
- start int64
- // size of uncompressed data
- size int64
- // end-of-stream encountered
- eos bool
- // EOS marker found
- eosMarker bool
- }
- // newDecoder creates a new decoder instance. The parameter size provides
- // the expected byte size of the decompressed data. If the size is
- // unknown use a negative value. In that case the decoder will look for
- // a terminating end-of-stream marker.
- func newDecoder(br io.ByteReader, state *state, dict *decoderDict, size int64) (d *decoder, err error) {
- rd, err := newRangeDecoder(br)
- if err != nil {
- return nil, err
- }
- d = &decoder{
- State: state,
- Dict: dict,
- rd: rd,
- size: size,
- start: dict.pos(),
- }
- return d, nil
- }
- // Reopen restarts the decoder with a new byte reader and a new size. Reopen
- // resets the Decompressed counter to zero.
- func (d *decoder) Reopen(br io.ByteReader, size int64) error {
- var err error
- if d.rd, err = newRangeDecoder(br); err != nil {
- return err
- }
- d.start = d.Dict.pos()
- d.size = size
- d.eos = false
- return nil
- }
- // decodeLiteral decodes a single literal from the LZMA stream.
- func (d *decoder) decodeLiteral() (op operation, err error) {
- litState := d.State.litState(d.Dict.byteAt(1), d.Dict.head)
- match := d.Dict.byteAt(int(d.State.rep[0]) + 1)
- s, err := d.State.litCodec.Decode(d.rd, d.State.state, match, litState)
- if err != nil {
- return nil, err
- }
- return lit{s}, nil
- }
- // errEOS indicates that an EOS marker has been found.
- var errEOS = errors.New("EOS marker found")
- // readOp decodes the next operation from the compressed stream. It
- // returns the operation. If an explicit end of stream marker is
- // identified the eos error is returned.
- func (d *decoder) readOp() (op operation, err error) {
- // Value of the end of stream (EOS) marker
- const eosDist = 1<<32 - 1
- state, state2, posState := d.State.states(d.Dict.head)
- b, err := d.State.isMatch[state2].Decode(d.rd)
- if err != nil {
- return nil, err
- }
- if b == 0 {
- // literal
- op, err := d.decodeLiteral()
- if err != nil {
- return nil, err
- }
- d.State.updateStateLiteral()
- return op, nil
- }
- b, err = d.State.isRep[state].Decode(d.rd)
- if err != nil {
- return nil, err
- }
- if b == 0 {
- // simple match
- d.State.rep[3], d.State.rep[2], d.State.rep[1] =
- d.State.rep[2], d.State.rep[1], d.State.rep[0]
- d.State.updateStateMatch()
- // The length decoder returns the length offset.
- n, err := d.State.lenCodec.Decode(d.rd, posState)
- if err != nil {
- return nil, err
- }
- // The dist decoder returns the distance offset. The actual
- // distance is 1 higher.
- d.State.rep[0], err = d.State.distCodec.Decode(d.rd, n)
- if err != nil {
- return nil, err
- }
- if d.State.rep[0] == eosDist {
- d.eosMarker = true
- return nil, errEOS
- }
- op = match{n: int(n) + minMatchLen,
- distance: int64(d.State.rep[0]) + minDistance}
- return op, nil
- }
- b, err = d.State.isRepG0[state].Decode(d.rd)
- if err != nil {
- return nil, err
- }
- dist := d.State.rep[0]
- if b == 0 {
- // rep match 0
- b, err = d.State.isRepG0Long[state2].Decode(d.rd)
- if err != nil {
- return nil, err
- }
- if b == 0 {
- d.State.updateStateShortRep()
- op = match{n: 1, distance: int64(dist) + minDistance}
- return op, nil
- }
- } else {
- b, err = d.State.isRepG1[state].Decode(d.rd)
- if err != nil {
- return nil, err
- }
- if b == 0 {
- dist = d.State.rep[1]
- } else {
- b, err = d.State.isRepG2[state].Decode(d.rd)
- if err != nil {
- return nil, err
- }
- if b == 0 {
- dist = d.State.rep[2]
- } else {
- dist = d.State.rep[3]
- d.State.rep[3] = d.State.rep[2]
- }
- d.State.rep[2] = d.State.rep[1]
- }
- d.State.rep[1] = d.State.rep[0]
- d.State.rep[0] = dist
- }
- n, err := d.State.repLenCodec.Decode(d.rd, posState)
- if err != nil {
- return nil, err
- }
- d.State.updateStateRep()
- op = match{n: int(n) + minMatchLen, distance: int64(dist) + minDistance}
- return op, nil
- }
- // apply takes the operation and transforms the decoder dictionary accordingly.
- func (d *decoder) apply(op operation) error {
- var err error
- switch x := op.(type) {
- case match:
- err = d.Dict.writeMatch(x.distance, x.n)
- case lit:
- err = d.Dict.WriteByte(x.b)
- default:
- panic("op is neither a match nor a literal")
- }
- return err
- }
- // decompress fills the dictionary unless no space for new data is
- // available. If the end of the LZMA stream has been reached io.EOF will
- // be returned.
- func (d *decoder) decompress() error {
- if d.eos {
- return io.EOF
- }
- for d.Dict.Available() >= maxMatchLen {
- op, err := d.readOp()
- switch err {
- case nil:
- // break
- case errEOS:
- d.eos = true
- if !d.rd.possiblyAtEnd() {
- return errDataAfterEOS
- }
- if d.size >= 0 && d.size != d.Decompressed() {
- return errSize
- }
- return io.EOF
- case io.EOF:
- d.eos = true
- return io.ErrUnexpectedEOF
- default:
- return err
- }
- if err = d.apply(op); err != nil {
- return err
- }
- if d.size >= 0 && d.Decompressed() >= d.size {
- d.eos = true
- if d.Decompressed() > d.size {
- return errSize
- }
- if !d.rd.possiblyAtEnd() {
- switch _, err = d.readOp(); err {
- case nil:
- return errSize
- case io.EOF:
- return io.ErrUnexpectedEOF
- case errEOS:
- break
- default:
- return err
- }
- }
- return io.EOF
- }
- }
- return nil
- }
- // Errors that may be returned while decoding data.
- var (
- errDataAfterEOS = errors.New("lzma: data after end of stream marker")
- errSize = errors.New("lzma: wrong uncompressed data size")
- )
- // Read reads data from the buffer. If no more data is available io.EOF is
- // returned.
- func (d *decoder) Read(p []byte) (n int, err error) {
- var k int
- for {
- // Read of decoder dict never returns an error.
- k, err = d.Dict.Read(p[n:])
- if err != nil {
- panic(fmt.Errorf("dictionary read error %s", err))
- }
- if k == 0 && d.eos {
- return n, io.EOF
- }
- n += k
- if n >= len(p) {
- return n, nil
- }
- if err = d.decompress(); err != nil && err != io.EOF {
- return n, err
- }
- }
- }
- // Decompressed returns the number of bytes decompressed by the decoder.
- func (d *decoder) Decompressed() int64 {
- return d.Dict.pos() - d.start
- }
|