| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398 |
- // Copyright 2014-2022 Ulrich Kunitz. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package lzma
- import (
- "errors"
- "fmt"
- "io"
- )
- const (
- // maximum size of compressed data in a chunk
- maxCompressed = 1 << 16
- // maximum size of uncompressed data in a chunk
- maxUncompressed = 1 << 21
- )
- // chunkType represents the type of an LZMA2 chunk. Note that this
- // value is an internal representation and no actual encoding of a LZMA2
- // chunk header.
- type chunkType byte
- // Possible values for the chunk type.
- const (
- // end of stream
- cEOS chunkType = iota
- // uncompressed; reset dictionary
- cUD
- // uncompressed; no reset of dictionary
- cU
- // LZMA compressed; no reset
- cL
- // LZMA compressed; reset state
- cLR
- // LZMA compressed; reset state; new property value
- cLRN
- // LZMA compressed; reset state; new property value; reset dictionary
- cLRND
- )
- // chunkTypeStrings provide a string representation for the chunk types.
- var chunkTypeStrings = [...]string{
- cEOS: "EOS",
- cU: "U",
- cUD: "UD",
- cL: "L",
- cLR: "LR",
- cLRN: "LRN",
- cLRND: "LRND",
- }
- // String returns a string representation of the chunk type.
- func (c chunkType) String() string {
- if !(cEOS <= c && c <= cLRND) {
- return "unknown"
- }
- return chunkTypeStrings[c]
- }
- // Actual encodings for the chunk types in the value. Note that the high
- // uncompressed size bits are stored in the header byte additionally.
- const (
- hEOS = 0
- hUD = 1
- hU = 2
- hL = 1 << 7
- hLR = 1<<7 | 1<<5
- hLRN = 1<<7 | 1<<6
- hLRND = 1<<7 | 1<<6 | 1<<5
- )
- // errHeaderByte indicates an unsupported value for the chunk header
- // byte. These bytes starts the variable-length chunk header.
- var errHeaderByte = errors.New("lzma: unsupported chunk header byte")
- // headerChunkType converts the header byte into a chunk type. It
- // ignores the uncompressed size bits in the chunk header byte.
- func headerChunkType(h byte) (c chunkType, err error) {
- if h&hL == 0 {
- // no compression
- switch h {
- case hEOS:
- c = cEOS
- case hUD:
- c = cUD
- case hU:
- c = cU
- default:
- return 0, errHeaderByte
- }
- return
- }
- switch h & hLRND {
- case hL:
- c = cL
- case hLR:
- c = cLR
- case hLRN:
- c = cLRN
- case hLRND:
- c = cLRND
- default:
- return 0, errHeaderByte
- }
- return
- }
- // uncompressedHeaderLen provides the length of an uncompressed header
- const uncompressedHeaderLen = 3
- // headerLen returns the length of the LZMA2 header for a given chunk
- // type.
- func headerLen(c chunkType) int {
- switch c {
- case cEOS:
- return 1
- case cU, cUD:
- return uncompressedHeaderLen
- case cL, cLR:
- return 5
- case cLRN, cLRND:
- return 6
- }
- panic(fmt.Errorf("unsupported chunk type %d", c))
- }
- // chunkHeader represents the contents of a chunk header.
- type chunkHeader struct {
- ctype chunkType
- uncompressed uint32
- compressed uint16
- props Properties
- }
- // String returns a string representation of the chunk header.
- func (h *chunkHeader) String() string {
- return fmt.Sprintf("%s %d %d %s", h.ctype, h.uncompressed,
- h.compressed, &h.props)
- }
- // UnmarshalBinary reads the content of the chunk header from the data
- // slice. The slice must have the correct length.
- func (h *chunkHeader) UnmarshalBinary(data []byte) error {
- if len(data) == 0 {
- return errors.New("no data")
- }
- c, err := headerChunkType(data[0])
- if err != nil {
- return err
- }
- n := headerLen(c)
- if len(data) < n {
- return errors.New("incomplete data")
- }
- if len(data) > n {
- return errors.New("invalid data length")
- }
- *h = chunkHeader{ctype: c}
- if c == cEOS {
- return nil
- }
- h.uncompressed = uint32(uint16BE(data[1:3]))
- if c <= cU {
- return nil
- }
- h.uncompressed |= uint32(data[0]&^hLRND) << 16
- h.compressed = uint16BE(data[3:5])
- if c <= cLR {
- return nil
- }
- h.props, err = PropertiesForCode(data[5])
- return err
- }
- // MarshalBinary encodes the chunk header value. The function checks
- // whether the content of the chunk header is correct.
- func (h *chunkHeader) MarshalBinary() (data []byte, err error) {
- if h.ctype > cLRND {
- return nil, errors.New("invalid chunk type")
- }
- if err = h.props.verify(); err != nil {
- return nil, err
- }
- data = make([]byte, headerLen(h.ctype))
- switch h.ctype {
- case cEOS:
- return data, nil
- case cUD:
- data[0] = hUD
- case cU:
- data[0] = hU
- case cL:
- data[0] = hL
- case cLR:
- data[0] = hLR
- case cLRN:
- data[0] = hLRN
- case cLRND:
- data[0] = hLRND
- }
- putUint16BE(data[1:3], uint16(h.uncompressed))
- if h.ctype <= cU {
- return data, nil
- }
- data[0] |= byte(h.uncompressed>>16) &^ hLRND
- putUint16BE(data[3:5], h.compressed)
- if h.ctype <= cLR {
- return data, nil
- }
- data[5] = h.props.Code()
- return data, nil
- }
- // readChunkHeader reads the chunk header from the IO reader.
- func readChunkHeader(r io.Reader) (h *chunkHeader, err error) {
- p := make([]byte, 1, 6)
- if _, err = io.ReadFull(r, p); err != nil {
- return
- }
- c, err := headerChunkType(p[0])
- if err != nil {
- return
- }
- p = p[:headerLen(c)]
- if _, err = io.ReadFull(r, p[1:]); err != nil {
- return
- }
- h = new(chunkHeader)
- if err = h.UnmarshalBinary(p); err != nil {
- return nil, err
- }
- return h, nil
- }
- // uint16BE converts a big-endian uint16 representation to an uint16
- // value.
- func uint16BE(p []byte) uint16 {
- return uint16(p[0])<<8 | uint16(p[1])
- }
- // putUint16BE puts the big-endian uint16 presentation into the given
- // slice.
- func putUint16BE(p []byte, x uint16) {
- p[0] = byte(x >> 8)
- p[1] = byte(x)
- }
- // chunkState is used to manage the state of the chunks
- type chunkState byte
- // start and stop define the initial and terminating state of the chunk
- // state
- const (
- start chunkState = 'S'
- stop chunkState = 'T'
- )
- // errors for the chunk state handling
- var (
- errChunkType = errors.New("lzma: unexpected chunk type")
- errState = errors.New("lzma: wrong chunk state")
- )
- // next transitions state based on chunk type input
- func (c *chunkState) next(ctype chunkType) error {
- switch *c {
- // start state
- case 'S':
- switch ctype {
- case cEOS:
- *c = 'T'
- case cUD:
- *c = 'R'
- case cLRND:
- *c = 'L'
- default:
- return errChunkType
- }
- // normal LZMA mode
- case 'L':
- switch ctype {
- case cEOS:
- *c = 'T'
- case cUD:
- *c = 'R'
- case cU:
- *c = 'U'
- case cL, cLR, cLRN, cLRND:
- break
- default:
- return errChunkType
- }
- // reset required
- case 'R':
- switch ctype {
- case cEOS:
- *c = 'T'
- case cUD, cU:
- break
- case cLRN, cLRND:
- *c = 'L'
- default:
- return errChunkType
- }
- // uncompressed
- case 'U':
- switch ctype {
- case cEOS:
- *c = 'T'
- case cUD:
- *c = 'R'
- case cU:
- break
- case cL, cLR, cLRN, cLRND:
- *c = 'L'
- default:
- return errChunkType
- }
- // terminal state
- case 'T':
- return errChunkType
- default:
- return errState
- }
- return nil
- }
- // defaultChunkType returns the default chunk type for each chunk state.
- func (c chunkState) defaultChunkType() chunkType {
- switch c {
- case 'S':
- return cLRND
- case 'L', 'U':
- return cL
- case 'R':
- return cLRN
- default:
- // no error
- return cEOS
- }
- }
- // maxDictCap defines the maximum dictionary capacity supported by the
- // LZMA2 dictionary capacity encoding.
- const maxDictCap = 1<<32 - 1
- // maxDictCapCode defines the maximum dictionary capacity code.
- const maxDictCapCode = 40
- // The function decodes the dictionary capacity byte, but doesn't change
- // for the correct range of the given byte.
- func decodeDictCap(c byte) int64 {
- return (2 | int64(c)&1) << (11 + (c>>1)&0x1f)
- }
- // DecodeDictCap decodes the encoded dictionary capacity. The function
- // returns an error if the code is out of range.
- func DecodeDictCap(c byte) (n int64, err error) {
- if c >= maxDictCapCode {
- if c == maxDictCapCode {
- return maxDictCap, nil
- }
- return 0, errors.New("lzma: invalid dictionary size code")
- }
- return decodeDictCap(c), nil
- }
- // EncodeDictCap encodes a dictionary capacity. The function returns the
- // code for the capacity that is greater or equal n. If n exceeds the
- // maximum support dictionary capacity, the maximum value is returned.
- func EncodeDictCap(n int64) byte {
- a, b := byte(0), byte(40)
- for a < b {
- c := a + (b-a)>>1
- m := decodeDictCap(c)
- if n <= m {
- if n == m {
- return c
- }
- b = c
- } else {
- a = c + 1
- }
- }
- return a
- }
|