reader.go 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359
  1. // Copyright 2014-2022 Ulrich Kunitz. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // Package xz supports the compression and decompression of xz files. It
  5. // supports version 1.0.4 of the specification without the non-LZMA2
  6. // filters. See http://tukaani.org/xz/xz-file-format-1.0.4.txt
  7. package xz
  8. import (
  9. "bytes"
  10. "errors"
  11. "fmt"
  12. "hash"
  13. "io"
  14. "github.com/ulikunitz/xz/internal/xlog"
  15. "github.com/ulikunitz/xz/lzma"
  16. )
  17. // ReaderConfig defines the parameters for the xz reader. The
  18. // SingleStream parameter requests the reader to assume that the
  19. // underlying stream contains only a single stream.
  20. type ReaderConfig struct {
  21. DictCap int
  22. SingleStream bool
  23. }
  24. // Verify checks the reader parameters for Validity. Zero values will be
  25. // replaced by default values.
  26. func (c *ReaderConfig) Verify() error {
  27. if c == nil {
  28. return errors.New("xz: reader parameters are nil")
  29. }
  30. lc := lzma.Reader2Config{DictCap: c.DictCap}
  31. if err := lc.Verify(); err != nil {
  32. return err
  33. }
  34. return nil
  35. }
  36. // Reader supports the reading of one or multiple xz streams.
  37. type Reader struct {
  38. ReaderConfig
  39. xz io.Reader
  40. sr *streamReader
  41. }
  42. // streamReader decodes a single xz stream
  43. type streamReader struct {
  44. ReaderConfig
  45. xz io.Reader
  46. br *blockReader
  47. newHash func() hash.Hash
  48. h header
  49. index []record
  50. }
  51. // NewReader creates a new xz reader using the default parameters.
  52. // The function reads and checks the header of the first XZ stream. The
  53. // reader will process multiple streams including padding.
  54. func NewReader(xz io.Reader) (r *Reader, err error) {
  55. return ReaderConfig{}.NewReader(xz)
  56. }
  57. // NewReader creates an xz stream reader. The created reader will be
  58. // able to process multiple streams and padding unless a SingleStream
  59. // has been set in the reader configuration c.
  60. func (c ReaderConfig) NewReader(xz io.Reader) (r *Reader, err error) {
  61. if err = c.Verify(); err != nil {
  62. return nil, err
  63. }
  64. r = &Reader{
  65. ReaderConfig: c,
  66. xz: xz,
  67. }
  68. if r.sr, err = c.newStreamReader(xz); err != nil {
  69. if err == io.EOF {
  70. err = io.ErrUnexpectedEOF
  71. }
  72. return nil, err
  73. }
  74. return r, nil
  75. }
  76. var errUnexpectedData = errors.New("xz: unexpected data after stream")
  77. // Read reads uncompressed data from the stream.
  78. func (r *Reader) Read(p []byte) (n int, err error) {
  79. for n < len(p) {
  80. if r.sr == nil {
  81. if r.SingleStream {
  82. data := make([]byte, 1)
  83. _, err = io.ReadFull(r.xz, data)
  84. if err != io.EOF {
  85. return n, errUnexpectedData
  86. }
  87. return n, io.EOF
  88. }
  89. for {
  90. r.sr, err = r.ReaderConfig.newStreamReader(r.xz)
  91. if err != errPadding {
  92. break
  93. }
  94. }
  95. if err != nil {
  96. return n, err
  97. }
  98. }
  99. k, err := r.sr.Read(p[n:])
  100. n += k
  101. if err != nil {
  102. if err == io.EOF {
  103. r.sr = nil
  104. continue
  105. }
  106. return n, err
  107. }
  108. }
  109. return n, nil
  110. }
  111. var errPadding = errors.New("xz: padding (4 zero bytes) encountered")
  112. // newStreamReader creates a new xz stream reader using the given configuration
  113. // parameters. NewReader reads and checks the header of the xz stream.
  114. func (c ReaderConfig) newStreamReader(xz io.Reader) (r *streamReader, err error) {
  115. if err = c.Verify(); err != nil {
  116. return nil, err
  117. }
  118. data := make([]byte, HeaderLen)
  119. if _, err := io.ReadFull(xz, data[:4]); err != nil {
  120. return nil, err
  121. }
  122. if bytes.Equal(data[:4], []byte{0, 0, 0, 0}) {
  123. return nil, errPadding
  124. }
  125. if _, err = io.ReadFull(xz, data[4:]); err != nil {
  126. if err == io.EOF {
  127. err = io.ErrUnexpectedEOF
  128. }
  129. return nil, err
  130. }
  131. r = &streamReader{
  132. ReaderConfig: c,
  133. xz: xz,
  134. index: make([]record, 0, 4),
  135. }
  136. if err = r.h.UnmarshalBinary(data); err != nil {
  137. return nil, err
  138. }
  139. xlog.Debugf("xz header %s", r.h)
  140. if r.newHash, err = newHashFunc(r.h.flags); err != nil {
  141. return nil, err
  142. }
  143. return r, nil
  144. }
  145. // readTail reads the index body and the xz footer.
  146. func (r *streamReader) readTail() error {
  147. index, n, err := readIndexBody(r.xz, len(r.index))
  148. if err != nil {
  149. if err == io.EOF {
  150. err = io.ErrUnexpectedEOF
  151. }
  152. return err
  153. }
  154. for i, rec := range r.index {
  155. if rec != index[i] {
  156. return fmt.Errorf("xz: record %d is %v; want %v",
  157. i, rec, index[i])
  158. }
  159. }
  160. p := make([]byte, footerLen)
  161. if _, err = io.ReadFull(r.xz, p); err != nil {
  162. if err == io.EOF {
  163. err = io.ErrUnexpectedEOF
  164. }
  165. return err
  166. }
  167. var f footer
  168. if err = f.UnmarshalBinary(p); err != nil {
  169. return err
  170. }
  171. xlog.Debugf("xz footer %s", f)
  172. if f.flags != r.h.flags {
  173. return errors.New("xz: footer flags incorrect")
  174. }
  175. if f.indexSize != int64(n)+1 {
  176. return errors.New("xz: index size in footer wrong")
  177. }
  178. return nil
  179. }
  180. // Read reads actual data from the xz stream.
  181. func (r *streamReader) Read(p []byte) (n int, err error) {
  182. for n < len(p) {
  183. if r.br == nil {
  184. bh, hlen, err := readBlockHeader(r.xz)
  185. if err != nil {
  186. if err == errIndexIndicator {
  187. if err = r.readTail(); err != nil {
  188. return n, err
  189. }
  190. return n, io.EOF
  191. }
  192. return n, err
  193. }
  194. xlog.Debugf("block %v", *bh)
  195. r.br, err = r.ReaderConfig.newBlockReader(r.xz, bh,
  196. hlen, r.newHash())
  197. if err != nil {
  198. return n, err
  199. }
  200. }
  201. k, err := r.br.Read(p[n:])
  202. n += k
  203. if err != nil {
  204. if err == io.EOF {
  205. r.index = append(r.index, r.br.record())
  206. r.br = nil
  207. } else {
  208. return n, err
  209. }
  210. }
  211. }
  212. return n, nil
  213. }
  214. // countingReader is a reader that counts the bytes read.
  215. type countingReader struct {
  216. r io.Reader
  217. n int64
  218. }
  219. // Read reads data from the wrapped reader and adds it to the n field.
  220. func (lr *countingReader) Read(p []byte) (n int, err error) {
  221. n, err = lr.r.Read(p)
  222. lr.n += int64(n)
  223. return n, err
  224. }
  225. // blockReader supports the reading of a block.
  226. type blockReader struct {
  227. lxz countingReader
  228. header *blockHeader
  229. headerLen int
  230. n int64
  231. hash hash.Hash
  232. r io.Reader
  233. }
  234. // newBlockReader creates a new block reader.
  235. func (c *ReaderConfig) newBlockReader(xz io.Reader, h *blockHeader,
  236. hlen int, hash hash.Hash) (br *blockReader, err error) {
  237. br = &blockReader{
  238. lxz: countingReader{r: xz},
  239. header: h,
  240. headerLen: hlen,
  241. hash: hash,
  242. }
  243. fr, err := c.newFilterReader(&br.lxz, h.filters)
  244. if err != nil {
  245. return nil, err
  246. }
  247. if br.hash.Size() != 0 {
  248. br.r = io.TeeReader(fr, br.hash)
  249. } else {
  250. br.r = fr
  251. }
  252. return br, nil
  253. }
  254. // uncompressedSize returns the uncompressed size of the block.
  255. func (br *blockReader) uncompressedSize() int64 {
  256. return br.n
  257. }
  258. // compressedSize returns the compressed size of the block.
  259. func (br *blockReader) compressedSize() int64 {
  260. return br.lxz.n
  261. }
  262. // unpaddedSize computes the unpadded size for the block.
  263. func (br *blockReader) unpaddedSize() int64 {
  264. n := int64(br.headerLen)
  265. n += br.compressedSize()
  266. n += int64(br.hash.Size())
  267. return n
  268. }
  269. // record returns the index record for the current block.
  270. func (br *blockReader) record() record {
  271. return record{br.unpaddedSize(), br.uncompressedSize()}
  272. }
  273. // Read reads data from the block.
  274. func (br *blockReader) Read(p []byte) (n int, err error) {
  275. n, err = br.r.Read(p)
  276. br.n += int64(n)
  277. u := br.header.uncompressedSize
  278. if u >= 0 && br.uncompressedSize() > u {
  279. return n, errors.New("xz: wrong uncompressed size for block")
  280. }
  281. c := br.header.compressedSize
  282. if c >= 0 && br.compressedSize() > c {
  283. return n, errors.New("xz: wrong compressed size for block")
  284. }
  285. if err != io.EOF {
  286. return n, err
  287. }
  288. if br.uncompressedSize() < u || br.compressedSize() < c {
  289. return n, io.ErrUnexpectedEOF
  290. }
  291. s := br.hash.Size()
  292. k := padLen(br.lxz.n)
  293. q := make([]byte, k+s, k+2*s)
  294. if _, err = io.ReadFull(br.lxz.r, q); err != nil {
  295. if err == io.EOF {
  296. err = io.ErrUnexpectedEOF
  297. }
  298. return n, err
  299. }
  300. if !allZeros(q[:k]) {
  301. return n, errors.New("xz: non-zero block padding")
  302. }
  303. checkSum := q[k:]
  304. computedSum := br.hash.Sum(checkSum[s:])
  305. if !bytes.Equal(checkSum, computedSum) {
  306. return n, errors.New("xz: checksum error for block")
  307. }
  308. return n, io.EOF
  309. }
  310. func (c *ReaderConfig) newFilterReader(r io.Reader, f []filter) (fr io.Reader,
  311. err error) {
  312. if err = verifyFilters(f); err != nil {
  313. return nil, err
  314. }
  315. fr = r
  316. for i := len(f) - 1; i >= 0; i-- {
  317. fr, err = f[i].reader(fr, c)
  318. if err != nil {
  319. return nil, err
  320. }
  321. }
  322. return fr, nil
  323. }