| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359 |
- // Copyright 2014-2022 Ulrich Kunitz. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- // Package xz supports the compression and decompression of xz files. It
- // supports version 1.0.4 of the specification without the non-LZMA2
- // filters. See http://tukaani.org/xz/xz-file-format-1.0.4.txt
- package xz
- import (
- "bytes"
- "errors"
- "fmt"
- "hash"
- "io"
- "github.com/ulikunitz/xz/internal/xlog"
- "github.com/ulikunitz/xz/lzma"
- )
- // ReaderConfig defines the parameters for the xz reader. The
- // SingleStream parameter requests the reader to assume that the
- // underlying stream contains only a single stream.
- type ReaderConfig struct {
- DictCap int
- SingleStream bool
- }
- // Verify checks the reader parameters for Validity. Zero values will be
- // replaced by default values.
- func (c *ReaderConfig) Verify() error {
- if c == nil {
- return errors.New("xz: reader parameters are nil")
- }
- lc := lzma.Reader2Config{DictCap: c.DictCap}
- if err := lc.Verify(); err != nil {
- return err
- }
- return nil
- }
- // Reader supports the reading of one or multiple xz streams.
- type Reader struct {
- ReaderConfig
- xz io.Reader
- sr *streamReader
- }
- // streamReader decodes a single xz stream
- type streamReader struct {
- ReaderConfig
- xz io.Reader
- br *blockReader
- newHash func() hash.Hash
- h header
- index []record
- }
- // NewReader creates a new xz reader using the default parameters.
- // The function reads and checks the header of the first XZ stream. The
- // reader will process multiple streams including padding.
- func NewReader(xz io.Reader) (r *Reader, err error) {
- return ReaderConfig{}.NewReader(xz)
- }
- // NewReader creates an xz stream reader. The created reader will be
- // able to process multiple streams and padding unless a SingleStream
- // has been set in the reader configuration c.
- func (c ReaderConfig) NewReader(xz io.Reader) (r *Reader, err error) {
- if err = c.Verify(); err != nil {
- return nil, err
- }
- r = &Reader{
- ReaderConfig: c,
- xz: xz,
- }
- if r.sr, err = c.newStreamReader(xz); err != nil {
- if err == io.EOF {
- err = io.ErrUnexpectedEOF
- }
- return nil, err
- }
- return r, nil
- }
- var errUnexpectedData = errors.New("xz: unexpected data after stream")
- // Read reads uncompressed data from the stream.
- func (r *Reader) Read(p []byte) (n int, err error) {
- for n < len(p) {
- if r.sr == nil {
- if r.SingleStream {
- data := make([]byte, 1)
- _, err = io.ReadFull(r.xz, data)
- if err != io.EOF {
- return n, errUnexpectedData
- }
- return n, io.EOF
- }
- for {
- r.sr, err = r.ReaderConfig.newStreamReader(r.xz)
- if err != errPadding {
- break
- }
- }
- if err != nil {
- return n, err
- }
- }
- k, err := r.sr.Read(p[n:])
- n += k
- if err != nil {
- if err == io.EOF {
- r.sr = nil
- continue
- }
- return n, err
- }
- }
- return n, nil
- }
- var errPadding = errors.New("xz: padding (4 zero bytes) encountered")
- // newStreamReader creates a new xz stream reader using the given configuration
- // parameters. NewReader reads and checks the header of the xz stream.
- func (c ReaderConfig) newStreamReader(xz io.Reader) (r *streamReader, err error) {
- if err = c.Verify(); err != nil {
- return nil, err
- }
- data := make([]byte, HeaderLen)
- if _, err := io.ReadFull(xz, data[:4]); err != nil {
- return nil, err
- }
- if bytes.Equal(data[:4], []byte{0, 0, 0, 0}) {
- return nil, errPadding
- }
- if _, err = io.ReadFull(xz, data[4:]); err != nil {
- if err == io.EOF {
- err = io.ErrUnexpectedEOF
- }
- return nil, err
- }
- r = &streamReader{
- ReaderConfig: c,
- xz: xz,
- index: make([]record, 0, 4),
- }
- if err = r.h.UnmarshalBinary(data); err != nil {
- return nil, err
- }
- xlog.Debugf("xz header %s", r.h)
- if r.newHash, err = newHashFunc(r.h.flags); err != nil {
- return nil, err
- }
- return r, nil
- }
- // readTail reads the index body and the xz footer.
- func (r *streamReader) readTail() error {
- index, n, err := readIndexBody(r.xz, len(r.index))
- if err != nil {
- if err == io.EOF {
- err = io.ErrUnexpectedEOF
- }
- return err
- }
- for i, rec := range r.index {
- if rec != index[i] {
- return fmt.Errorf("xz: record %d is %v; want %v",
- i, rec, index[i])
- }
- }
- p := make([]byte, footerLen)
- if _, err = io.ReadFull(r.xz, p); err != nil {
- if err == io.EOF {
- err = io.ErrUnexpectedEOF
- }
- return err
- }
- var f footer
- if err = f.UnmarshalBinary(p); err != nil {
- return err
- }
- xlog.Debugf("xz footer %s", f)
- if f.flags != r.h.flags {
- return errors.New("xz: footer flags incorrect")
- }
- if f.indexSize != int64(n)+1 {
- return errors.New("xz: index size in footer wrong")
- }
- return nil
- }
- // Read reads actual data from the xz stream.
- func (r *streamReader) Read(p []byte) (n int, err error) {
- for n < len(p) {
- if r.br == nil {
- bh, hlen, err := readBlockHeader(r.xz)
- if err != nil {
- if err == errIndexIndicator {
- if err = r.readTail(); err != nil {
- return n, err
- }
- return n, io.EOF
- }
- return n, err
- }
- xlog.Debugf("block %v", *bh)
- r.br, err = r.ReaderConfig.newBlockReader(r.xz, bh,
- hlen, r.newHash())
- if err != nil {
- return n, err
- }
- }
- k, err := r.br.Read(p[n:])
- n += k
- if err != nil {
- if err == io.EOF {
- r.index = append(r.index, r.br.record())
- r.br = nil
- } else {
- return n, err
- }
- }
- }
- return n, nil
- }
- // countingReader is a reader that counts the bytes read.
- type countingReader struct {
- r io.Reader
- n int64
- }
- // Read reads data from the wrapped reader and adds it to the n field.
- func (lr *countingReader) Read(p []byte) (n int, err error) {
- n, err = lr.r.Read(p)
- lr.n += int64(n)
- return n, err
- }
- // blockReader supports the reading of a block.
- type blockReader struct {
- lxz countingReader
- header *blockHeader
- headerLen int
- n int64
- hash hash.Hash
- r io.Reader
- }
- // newBlockReader creates a new block reader.
- func (c *ReaderConfig) newBlockReader(xz io.Reader, h *blockHeader,
- hlen int, hash hash.Hash) (br *blockReader, err error) {
- br = &blockReader{
- lxz: countingReader{r: xz},
- header: h,
- headerLen: hlen,
- hash: hash,
- }
- fr, err := c.newFilterReader(&br.lxz, h.filters)
- if err != nil {
- return nil, err
- }
- if br.hash.Size() != 0 {
- br.r = io.TeeReader(fr, br.hash)
- } else {
- br.r = fr
- }
- return br, nil
- }
- // uncompressedSize returns the uncompressed size of the block.
- func (br *blockReader) uncompressedSize() int64 {
- return br.n
- }
- // compressedSize returns the compressed size of the block.
- func (br *blockReader) compressedSize() int64 {
- return br.lxz.n
- }
- // unpaddedSize computes the unpadded size for the block.
- func (br *blockReader) unpaddedSize() int64 {
- n := int64(br.headerLen)
- n += br.compressedSize()
- n += int64(br.hash.Size())
- return n
- }
- // record returns the index record for the current block.
- func (br *blockReader) record() record {
- return record{br.unpaddedSize(), br.uncompressedSize()}
- }
- // Read reads data from the block.
- func (br *blockReader) Read(p []byte) (n int, err error) {
- n, err = br.r.Read(p)
- br.n += int64(n)
- u := br.header.uncompressedSize
- if u >= 0 && br.uncompressedSize() > u {
- return n, errors.New("xz: wrong uncompressed size for block")
- }
- c := br.header.compressedSize
- if c >= 0 && br.compressedSize() > c {
- return n, errors.New("xz: wrong compressed size for block")
- }
- if err != io.EOF {
- return n, err
- }
- if br.uncompressedSize() < u || br.compressedSize() < c {
- return n, io.ErrUnexpectedEOF
- }
- s := br.hash.Size()
- k := padLen(br.lxz.n)
- q := make([]byte, k+s, k+2*s)
- if _, err = io.ReadFull(br.lxz.r, q); err != nil {
- if err == io.EOF {
- err = io.ErrUnexpectedEOF
- }
- return n, err
- }
- if !allZeros(q[:k]) {
- return n, errors.New("xz: non-zero block padding")
- }
- checkSum := q[k:]
- computedSum := br.hash.Sum(checkSum[s:])
- if !bytes.Equal(checkSum, computedSum) {
- return n, errors.New("xz: checksum error for block")
- }
- return n, io.EOF
- }
- func (c *ReaderConfig) newFilterReader(r io.Reader, f []filter) (fr io.Reader,
- err error) {
- if err = verifyFilters(f); err != nil {
- return nil, err
- }
- fr = r
- for i := len(f) - 1; i >= 0; i-- {
- fr, err = f[i].reader(fr, c)
- if err != nil {
- return nil, err
- }
- }
- return fr, nil
- }
|