header2.go 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398
  1. // Copyright 2014-2022 Ulrich Kunitz. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package lzma
  5. import (
  6. "errors"
  7. "fmt"
  8. "io"
  9. )
  10. const (
  11. // maximum size of compressed data in a chunk
  12. maxCompressed = 1 << 16
  13. // maximum size of uncompressed data in a chunk
  14. maxUncompressed = 1 << 21
  15. )
  16. // chunkType represents the type of an LZMA2 chunk. Note that this
  17. // value is an internal representation and no actual encoding of a LZMA2
  18. // chunk header.
  19. type chunkType byte
  20. // Possible values for the chunk type.
  21. const (
  22. // end of stream
  23. cEOS chunkType = iota
  24. // uncompressed; reset dictionary
  25. cUD
  26. // uncompressed; no reset of dictionary
  27. cU
  28. // LZMA compressed; no reset
  29. cL
  30. // LZMA compressed; reset state
  31. cLR
  32. // LZMA compressed; reset state; new property value
  33. cLRN
  34. // LZMA compressed; reset state; new property value; reset dictionary
  35. cLRND
  36. )
  37. // chunkTypeStrings provide a string representation for the chunk types.
  38. var chunkTypeStrings = [...]string{
  39. cEOS: "EOS",
  40. cU: "U",
  41. cUD: "UD",
  42. cL: "L",
  43. cLR: "LR",
  44. cLRN: "LRN",
  45. cLRND: "LRND",
  46. }
  47. // String returns a string representation of the chunk type.
  48. func (c chunkType) String() string {
  49. if !(cEOS <= c && c <= cLRND) {
  50. return "unknown"
  51. }
  52. return chunkTypeStrings[c]
  53. }
  54. // Actual encodings for the chunk types in the value. Note that the high
  55. // uncompressed size bits are stored in the header byte additionally.
  56. const (
  57. hEOS = 0
  58. hUD = 1
  59. hU = 2
  60. hL = 1 << 7
  61. hLR = 1<<7 | 1<<5
  62. hLRN = 1<<7 | 1<<6
  63. hLRND = 1<<7 | 1<<6 | 1<<5
  64. )
  65. // errHeaderByte indicates an unsupported value for the chunk header
  66. // byte. These bytes starts the variable-length chunk header.
  67. var errHeaderByte = errors.New("lzma: unsupported chunk header byte")
  68. // headerChunkType converts the header byte into a chunk type. It
  69. // ignores the uncompressed size bits in the chunk header byte.
  70. func headerChunkType(h byte) (c chunkType, err error) {
  71. if h&hL == 0 {
  72. // no compression
  73. switch h {
  74. case hEOS:
  75. c = cEOS
  76. case hUD:
  77. c = cUD
  78. case hU:
  79. c = cU
  80. default:
  81. return 0, errHeaderByte
  82. }
  83. return
  84. }
  85. switch h & hLRND {
  86. case hL:
  87. c = cL
  88. case hLR:
  89. c = cLR
  90. case hLRN:
  91. c = cLRN
  92. case hLRND:
  93. c = cLRND
  94. default:
  95. return 0, errHeaderByte
  96. }
  97. return
  98. }
  99. // uncompressedHeaderLen provides the length of an uncompressed header
  100. const uncompressedHeaderLen = 3
  101. // headerLen returns the length of the LZMA2 header for a given chunk
  102. // type.
  103. func headerLen(c chunkType) int {
  104. switch c {
  105. case cEOS:
  106. return 1
  107. case cU, cUD:
  108. return uncompressedHeaderLen
  109. case cL, cLR:
  110. return 5
  111. case cLRN, cLRND:
  112. return 6
  113. }
  114. panic(fmt.Errorf("unsupported chunk type %d", c))
  115. }
  116. // chunkHeader represents the contents of a chunk header.
  117. type chunkHeader struct {
  118. ctype chunkType
  119. uncompressed uint32
  120. compressed uint16
  121. props Properties
  122. }
  123. // String returns a string representation of the chunk header.
  124. func (h *chunkHeader) String() string {
  125. return fmt.Sprintf("%s %d %d %s", h.ctype, h.uncompressed,
  126. h.compressed, &h.props)
  127. }
  128. // UnmarshalBinary reads the content of the chunk header from the data
  129. // slice. The slice must have the correct length.
  130. func (h *chunkHeader) UnmarshalBinary(data []byte) error {
  131. if len(data) == 0 {
  132. return errors.New("no data")
  133. }
  134. c, err := headerChunkType(data[0])
  135. if err != nil {
  136. return err
  137. }
  138. n := headerLen(c)
  139. if len(data) < n {
  140. return errors.New("incomplete data")
  141. }
  142. if len(data) > n {
  143. return errors.New("invalid data length")
  144. }
  145. *h = chunkHeader{ctype: c}
  146. if c == cEOS {
  147. return nil
  148. }
  149. h.uncompressed = uint32(uint16BE(data[1:3]))
  150. if c <= cU {
  151. return nil
  152. }
  153. h.uncompressed |= uint32(data[0]&^hLRND) << 16
  154. h.compressed = uint16BE(data[3:5])
  155. if c <= cLR {
  156. return nil
  157. }
  158. h.props, err = PropertiesForCode(data[5])
  159. return err
  160. }
  161. // MarshalBinary encodes the chunk header value. The function checks
  162. // whether the content of the chunk header is correct.
  163. func (h *chunkHeader) MarshalBinary() (data []byte, err error) {
  164. if h.ctype > cLRND {
  165. return nil, errors.New("invalid chunk type")
  166. }
  167. if err = h.props.verify(); err != nil {
  168. return nil, err
  169. }
  170. data = make([]byte, headerLen(h.ctype))
  171. switch h.ctype {
  172. case cEOS:
  173. return data, nil
  174. case cUD:
  175. data[0] = hUD
  176. case cU:
  177. data[0] = hU
  178. case cL:
  179. data[0] = hL
  180. case cLR:
  181. data[0] = hLR
  182. case cLRN:
  183. data[0] = hLRN
  184. case cLRND:
  185. data[0] = hLRND
  186. }
  187. putUint16BE(data[1:3], uint16(h.uncompressed))
  188. if h.ctype <= cU {
  189. return data, nil
  190. }
  191. data[0] |= byte(h.uncompressed>>16) &^ hLRND
  192. putUint16BE(data[3:5], h.compressed)
  193. if h.ctype <= cLR {
  194. return data, nil
  195. }
  196. data[5] = h.props.Code()
  197. return data, nil
  198. }
  199. // readChunkHeader reads the chunk header from the IO reader.
  200. func readChunkHeader(r io.Reader) (h *chunkHeader, err error) {
  201. p := make([]byte, 1, 6)
  202. if _, err = io.ReadFull(r, p); err != nil {
  203. return
  204. }
  205. c, err := headerChunkType(p[0])
  206. if err != nil {
  207. return
  208. }
  209. p = p[:headerLen(c)]
  210. if _, err = io.ReadFull(r, p[1:]); err != nil {
  211. return
  212. }
  213. h = new(chunkHeader)
  214. if err = h.UnmarshalBinary(p); err != nil {
  215. return nil, err
  216. }
  217. return h, nil
  218. }
  219. // uint16BE converts a big-endian uint16 representation to an uint16
  220. // value.
  221. func uint16BE(p []byte) uint16 {
  222. return uint16(p[0])<<8 | uint16(p[1])
  223. }
  224. // putUint16BE puts the big-endian uint16 presentation into the given
  225. // slice.
  226. func putUint16BE(p []byte, x uint16) {
  227. p[0] = byte(x >> 8)
  228. p[1] = byte(x)
  229. }
  230. // chunkState is used to manage the state of the chunks
  231. type chunkState byte
  232. // start and stop define the initial and terminating state of the chunk
  233. // state
  234. const (
  235. start chunkState = 'S'
  236. stop chunkState = 'T'
  237. )
  238. // errors for the chunk state handling
  239. var (
  240. errChunkType = errors.New("lzma: unexpected chunk type")
  241. errState = errors.New("lzma: wrong chunk state")
  242. )
  243. // next transitions state based on chunk type input
  244. func (c *chunkState) next(ctype chunkType) error {
  245. switch *c {
  246. // start state
  247. case 'S':
  248. switch ctype {
  249. case cEOS:
  250. *c = 'T'
  251. case cUD:
  252. *c = 'R'
  253. case cLRND:
  254. *c = 'L'
  255. default:
  256. return errChunkType
  257. }
  258. // normal LZMA mode
  259. case 'L':
  260. switch ctype {
  261. case cEOS:
  262. *c = 'T'
  263. case cUD:
  264. *c = 'R'
  265. case cU:
  266. *c = 'U'
  267. case cL, cLR, cLRN, cLRND:
  268. break
  269. default:
  270. return errChunkType
  271. }
  272. // reset required
  273. case 'R':
  274. switch ctype {
  275. case cEOS:
  276. *c = 'T'
  277. case cUD, cU:
  278. break
  279. case cLRN, cLRND:
  280. *c = 'L'
  281. default:
  282. return errChunkType
  283. }
  284. // uncompressed
  285. case 'U':
  286. switch ctype {
  287. case cEOS:
  288. *c = 'T'
  289. case cUD:
  290. *c = 'R'
  291. case cU:
  292. break
  293. case cL, cLR, cLRN, cLRND:
  294. *c = 'L'
  295. default:
  296. return errChunkType
  297. }
  298. // terminal state
  299. case 'T':
  300. return errChunkType
  301. default:
  302. return errState
  303. }
  304. return nil
  305. }
  306. // defaultChunkType returns the default chunk type for each chunk state.
  307. func (c chunkState) defaultChunkType() chunkType {
  308. switch c {
  309. case 'S':
  310. return cLRND
  311. case 'L', 'U':
  312. return cL
  313. case 'R':
  314. return cLRN
  315. default:
  316. // no error
  317. return cEOS
  318. }
  319. }
  320. // maxDictCap defines the maximum dictionary capacity supported by the
  321. // LZMA2 dictionary capacity encoding.
  322. const maxDictCap = 1<<32 - 1
  323. // maxDictCapCode defines the maximum dictionary capacity code.
  324. const maxDictCapCode = 40
  325. // The function decodes the dictionary capacity byte, but doesn't change
  326. // for the correct range of the given byte.
  327. func decodeDictCap(c byte) int64 {
  328. return (2 | int64(c)&1) << (11 + (c>>1)&0x1f)
  329. }
  330. // DecodeDictCap decodes the encoded dictionary capacity. The function
  331. // returns an error if the code is out of range.
  332. func DecodeDictCap(c byte) (n int64, err error) {
  333. if c >= maxDictCapCode {
  334. if c == maxDictCapCode {
  335. return maxDictCap, nil
  336. }
  337. return 0, errors.New("lzma: invalid dictionary size code")
  338. }
  339. return decodeDictCap(c), nil
  340. }
  341. // EncodeDictCap encodes a dictionary capacity. The function returns the
  342. // code for the capacity that is greater or equal n. If n exceeds the
  343. // maximum support dictionary capacity, the maximum value is returned.
  344. func EncodeDictCap(n int64) byte {
  345. a, b := byte(0), byte(40)
  346. for a < b {
  347. c := a + (b-a)>>1
  348. m := decodeDictCap(c)
  349. if n <= m {
  350. if n == m {
  351. return c
  352. }
  353. b = c
  354. } else {
  355. a = c + 1
  356. }
  357. }
  358. return a
  359. }