| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268 |
- // Copyright 2014-2022 Ulrich Kunitz. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package lzma
- import (
- "fmt"
- "io"
- )
- // opLenMargin provides the upper limit of the number of bytes required
- // to encode a single operation.
- const opLenMargin = 16
- // compressFlags control the compression process.
- type compressFlags uint32
- // Values for compressFlags.
- const (
- // all data should be compressed, even if compression is not
- // optimal.
- all compressFlags = 1 << iota
- )
- // encoderFlags provide the flags for an encoder.
- type encoderFlags uint32
- // Flags for the encoder.
- const (
- // eosMarker requests an EOS marker to be written.
- eosMarker encoderFlags = 1 << iota
- )
- // Encoder compresses data buffered in the encoder dictionary and writes
- // it into a byte writer.
- type encoder struct {
- dict *encoderDict
- state *state
- re *rangeEncoder
- start int64
- // generate eos marker
- marker bool
- limit bool
- margin int
- }
- // newEncoder creates a new encoder. If the byte writer must be
- // limited use LimitedByteWriter provided by this package. The flags
- // argument supports the eosMarker flag, controlling whether a
- // terminating end-of-stream marker must be written.
- func newEncoder(bw io.ByteWriter, state *state, dict *encoderDict,
- flags encoderFlags) (e *encoder, err error) {
- re, err := newRangeEncoder(bw)
- if err != nil {
- return nil, err
- }
- e = &encoder{
- dict: dict,
- state: state,
- re: re,
- marker: flags&eosMarker != 0,
- start: dict.Pos(),
- margin: opLenMargin,
- }
- if e.marker {
- e.margin += 5
- }
- return e, nil
- }
- // Write writes the bytes from p into the dictionary. If not enough
- // space is available the data in the dictionary buffer will be
- // compressed to make additional space available. If the limit of the
- // underlying writer has been reached ErrLimit will be returned.
- func (e *encoder) Write(p []byte) (n int, err error) {
- for {
- k, err := e.dict.Write(p[n:])
- n += k
- if err == ErrNoSpace {
- if err = e.compress(0); err != nil {
- return n, err
- }
- continue
- }
- return n, err
- }
- }
- // Reopen reopens the encoder with a new byte writer.
- func (e *encoder) Reopen(bw io.ByteWriter) error {
- var err error
- if e.re, err = newRangeEncoder(bw); err != nil {
- return err
- }
- e.start = e.dict.Pos()
- e.limit = false
- return nil
- }
- // writeLiteral writes a literal into the LZMA stream
- func (e *encoder) writeLiteral(l lit) error {
- var err error
- state, state2, _ := e.state.states(e.dict.Pos())
- if err = e.state.isMatch[state2].Encode(e.re, 0); err != nil {
- return err
- }
- litState := e.state.litState(e.dict.ByteAt(1), e.dict.Pos())
- match := e.dict.ByteAt(int(e.state.rep[0]) + 1)
- err = e.state.litCodec.Encode(e.re, l.b, state, match, litState)
- if err != nil {
- return err
- }
- e.state.updateStateLiteral()
- return nil
- }
- // iverson implements the Iverson operator as proposed by Donald Knuth in his
- // book Concrete Mathematics.
- func iverson(ok bool) uint32 {
- if ok {
- return 1
- }
- return 0
- }
- // writeMatch writes a repetition operation into the operation stream
- func (e *encoder) writeMatch(m match) error {
- var err error
- if !(minDistance <= m.distance && m.distance <= maxDistance) {
- panic(fmt.Errorf("match distance %d out of range", m.distance))
- }
- dist := uint32(m.distance - minDistance)
- if !(minMatchLen <= m.n && m.n <= maxMatchLen) &&
- !(dist == e.state.rep[0] && m.n == 1) {
- panic(fmt.Errorf(
- "match length %d out of range; dist %d rep[0] %d",
- m.n, dist, e.state.rep[0]))
- }
- state, state2, posState := e.state.states(e.dict.Pos())
- if err = e.state.isMatch[state2].Encode(e.re, 1); err != nil {
- return err
- }
- g := 0
- for ; g < 4; g++ {
- if e.state.rep[g] == dist {
- break
- }
- }
- b := iverson(g < 4)
- if err = e.state.isRep[state].Encode(e.re, b); err != nil {
- return err
- }
- n := uint32(m.n - minMatchLen)
- if b == 0 {
- // simple match
- e.state.rep[3], e.state.rep[2], e.state.rep[1], e.state.rep[0] =
- e.state.rep[2], e.state.rep[1], e.state.rep[0], dist
- e.state.updateStateMatch()
- if err = e.state.lenCodec.Encode(e.re, n, posState); err != nil {
- return err
- }
- return e.state.distCodec.Encode(e.re, dist, n)
- }
- b = iverson(g != 0)
- if err = e.state.isRepG0[state].Encode(e.re, b); err != nil {
- return err
- }
- if b == 0 {
- // g == 0
- b = iverson(m.n != 1)
- if err = e.state.isRepG0Long[state2].Encode(e.re, b); err != nil {
- return err
- }
- if b == 0 {
- e.state.updateStateShortRep()
- return nil
- }
- } else {
- // g in {1,2,3}
- b = iverson(g != 1)
- if err = e.state.isRepG1[state].Encode(e.re, b); err != nil {
- return err
- }
- if b == 1 {
- // g in {2,3}
- b = iverson(g != 2)
- err = e.state.isRepG2[state].Encode(e.re, b)
- if err != nil {
- return err
- }
- if b == 1 {
- e.state.rep[3] = e.state.rep[2]
- }
- e.state.rep[2] = e.state.rep[1]
- }
- e.state.rep[1] = e.state.rep[0]
- e.state.rep[0] = dist
- }
- e.state.updateStateRep()
- return e.state.repLenCodec.Encode(e.re, n, posState)
- }
- // writeOp writes a single operation to the range encoder. The function
- // checks whether there is enough space available to close the LZMA
- // stream.
- func (e *encoder) writeOp(op operation) error {
- if e.re.Available() < int64(e.margin) {
- return ErrLimit
- }
- switch x := op.(type) {
- case lit:
- return e.writeLiteral(x)
- case match:
- return e.writeMatch(x)
- default:
- panic("unexpected operation")
- }
- }
- // compress compressed data from the dictionary buffer. If the flag all
- // is set, all data in the dictionary buffer will be compressed. The
- // function returns ErrLimit if the underlying writer has reached its
- // limit.
- func (e *encoder) compress(flags compressFlags) error {
- n := 0
- if flags&all == 0 {
- n = maxMatchLen - 1
- }
- d := e.dict
- m := d.m
- for d.Buffered() > n {
- op := m.NextOp(e.state.rep)
- if err := e.writeOp(op); err != nil {
- return err
- }
- d.Discard(op.Len())
- }
- return nil
- }
- // eosMatch is a pseudo operation that indicates the end of the stream.
- var eosMatch = match{distance: maxDistance, n: minMatchLen}
- // Close terminates the LZMA stream. If requested the end-of-stream
- // marker will be written. If the byte writer limit has been or will be
- // reached during compression of the remaining data in the buffer the
- // LZMA stream will be closed and data will remain in the buffer.
- func (e *encoder) Close() error {
- err := e.compress(all)
- if err != nil && err != ErrLimit {
- return err
- }
- if e.marker {
- if err := e.writeMatch(eosMatch); err != nil {
- return err
- }
- }
- err = e.re.Close()
- return err
- }
- // Compressed returns the number bytes of the input data that been
- // compressed.
- func (e *encoder) Compressed() int64 {
- return e.dict.Pos() - e.start
- }
|