encoder.go 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268
  1. // Copyright 2014-2022 Ulrich Kunitz. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package lzma
  5. import (
  6. "fmt"
  7. "io"
  8. )
  9. // opLenMargin provides the upper limit of the number of bytes required
  10. // to encode a single operation.
  11. const opLenMargin = 16
  12. // compressFlags control the compression process.
  13. type compressFlags uint32
  14. // Values for compressFlags.
  15. const (
  16. // all data should be compressed, even if compression is not
  17. // optimal.
  18. all compressFlags = 1 << iota
  19. )
  20. // encoderFlags provide the flags for an encoder.
  21. type encoderFlags uint32
  22. // Flags for the encoder.
  23. const (
  24. // eosMarker requests an EOS marker to be written.
  25. eosMarker encoderFlags = 1 << iota
  26. )
  27. // Encoder compresses data buffered in the encoder dictionary and writes
  28. // it into a byte writer.
  29. type encoder struct {
  30. dict *encoderDict
  31. state *state
  32. re *rangeEncoder
  33. start int64
  34. // generate eos marker
  35. marker bool
  36. limit bool
  37. margin int
  38. }
  39. // newEncoder creates a new encoder. If the byte writer must be
  40. // limited use LimitedByteWriter provided by this package. The flags
  41. // argument supports the eosMarker flag, controlling whether a
  42. // terminating end-of-stream marker must be written.
  43. func newEncoder(bw io.ByteWriter, state *state, dict *encoderDict,
  44. flags encoderFlags) (e *encoder, err error) {
  45. re, err := newRangeEncoder(bw)
  46. if err != nil {
  47. return nil, err
  48. }
  49. e = &encoder{
  50. dict: dict,
  51. state: state,
  52. re: re,
  53. marker: flags&eosMarker != 0,
  54. start: dict.Pos(),
  55. margin: opLenMargin,
  56. }
  57. if e.marker {
  58. e.margin += 5
  59. }
  60. return e, nil
  61. }
  62. // Write writes the bytes from p into the dictionary. If not enough
  63. // space is available the data in the dictionary buffer will be
  64. // compressed to make additional space available. If the limit of the
  65. // underlying writer has been reached ErrLimit will be returned.
  66. func (e *encoder) Write(p []byte) (n int, err error) {
  67. for {
  68. k, err := e.dict.Write(p[n:])
  69. n += k
  70. if err == ErrNoSpace {
  71. if err = e.compress(0); err != nil {
  72. return n, err
  73. }
  74. continue
  75. }
  76. return n, err
  77. }
  78. }
  79. // Reopen reopens the encoder with a new byte writer.
  80. func (e *encoder) Reopen(bw io.ByteWriter) error {
  81. var err error
  82. if e.re, err = newRangeEncoder(bw); err != nil {
  83. return err
  84. }
  85. e.start = e.dict.Pos()
  86. e.limit = false
  87. return nil
  88. }
  89. // writeLiteral writes a literal into the LZMA stream
  90. func (e *encoder) writeLiteral(l lit) error {
  91. var err error
  92. state, state2, _ := e.state.states(e.dict.Pos())
  93. if err = e.state.isMatch[state2].Encode(e.re, 0); err != nil {
  94. return err
  95. }
  96. litState := e.state.litState(e.dict.ByteAt(1), e.dict.Pos())
  97. match := e.dict.ByteAt(int(e.state.rep[0]) + 1)
  98. err = e.state.litCodec.Encode(e.re, l.b, state, match, litState)
  99. if err != nil {
  100. return err
  101. }
  102. e.state.updateStateLiteral()
  103. return nil
  104. }
  105. // iverson implements the Iverson operator as proposed by Donald Knuth in his
  106. // book Concrete Mathematics.
  107. func iverson(ok bool) uint32 {
  108. if ok {
  109. return 1
  110. }
  111. return 0
  112. }
  113. // writeMatch writes a repetition operation into the operation stream
  114. func (e *encoder) writeMatch(m match) error {
  115. var err error
  116. if !(minDistance <= m.distance && m.distance <= maxDistance) {
  117. panic(fmt.Errorf("match distance %d out of range", m.distance))
  118. }
  119. dist := uint32(m.distance - minDistance)
  120. if !(minMatchLen <= m.n && m.n <= maxMatchLen) &&
  121. !(dist == e.state.rep[0] && m.n == 1) {
  122. panic(fmt.Errorf(
  123. "match length %d out of range; dist %d rep[0] %d",
  124. m.n, dist, e.state.rep[0]))
  125. }
  126. state, state2, posState := e.state.states(e.dict.Pos())
  127. if err = e.state.isMatch[state2].Encode(e.re, 1); err != nil {
  128. return err
  129. }
  130. g := 0
  131. for ; g < 4; g++ {
  132. if e.state.rep[g] == dist {
  133. break
  134. }
  135. }
  136. b := iverson(g < 4)
  137. if err = e.state.isRep[state].Encode(e.re, b); err != nil {
  138. return err
  139. }
  140. n := uint32(m.n - minMatchLen)
  141. if b == 0 {
  142. // simple match
  143. e.state.rep[3], e.state.rep[2], e.state.rep[1], e.state.rep[0] =
  144. e.state.rep[2], e.state.rep[1], e.state.rep[0], dist
  145. e.state.updateStateMatch()
  146. if err = e.state.lenCodec.Encode(e.re, n, posState); err != nil {
  147. return err
  148. }
  149. return e.state.distCodec.Encode(e.re, dist, n)
  150. }
  151. b = iverson(g != 0)
  152. if err = e.state.isRepG0[state].Encode(e.re, b); err != nil {
  153. return err
  154. }
  155. if b == 0 {
  156. // g == 0
  157. b = iverson(m.n != 1)
  158. if err = e.state.isRepG0Long[state2].Encode(e.re, b); err != nil {
  159. return err
  160. }
  161. if b == 0 {
  162. e.state.updateStateShortRep()
  163. return nil
  164. }
  165. } else {
  166. // g in {1,2,3}
  167. b = iverson(g != 1)
  168. if err = e.state.isRepG1[state].Encode(e.re, b); err != nil {
  169. return err
  170. }
  171. if b == 1 {
  172. // g in {2,3}
  173. b = iverson(g != 2)
  174. err = e.state.isRepG2[state].Encode(e.re, b)
  175. if err != nil {
  176. return err
  177. }
  178. if b == 1 {
  179. e.state.rep[3] = e.state.rep[2]
  180. }
  181. e.state.rep[2] = e.state.rep[1]
  182. }
  183. e.state.rep[1] = e.state.rep[0]
  184. e.state.rep[0] = dist
  185. }
  186. e.state.updateStateRep()
  187. return e.state.repLenCodec.Encode(e.re, n, posState)
  188. }
  189. // writeOp writes a single operation to the range encoder. The function
  190. // checks whether there is enough space available to close the LZMA
  191. // stream.
  192. func (e *encoder) writeOp(op operation) error {
  193. if e.re.Available() < int64(e.margin) {
  194. return ErrLimit
  195. }
  196. switch x := op.(type) {
  197. case lit:
  198. return e.writeLiteral(x)
  199. case match:
  200. return e.writeMatch(x)
  201. default:
  202. panic("unexpected operation")
  203. }
  204. }
  205. // compress compressed data from the dictionary buffer. If the flag all
  206. // is set, all data in the dictionary buffer will be compressed. The
  207. // function returns ErrLimit if the underlying writer has reached its
  208. // limit.
  209. func (e *encoder) compress(flags compressFlags) error {
  210. n := 0
  211. if flags&all == 0 {
  212. n = maxMatchLen - 1
  213. }
  214. d := e.dict
  215. m := d.m
  216. for d.Buffered() > n {
  217. op := m.NextOp(e.state.rep)
  218. if err := e.writeOp(op); err != nil {
  219. return err
  220. }
  221. d.Discard(op.Len())
  222. }
  223. return nil
  224. }
  225. // eosMatch is a pseudo operation that indicates the end of the stream.
  226. var eosMatch = match{distance: maxDistance, n: minMatchLen}
  227. // Close terminates the LZMA stream. If requested the end-of-stream
  228. // marker will be written. If the byte writer limit has been or will be
  229. // reached during compression of the remaining data in the buffer the
  230. // LZMA stream will be closed and data will remain in the buffer.
  231. func (e *encoder) Close() error {
  232. err := e.compress(all)
  233. if err != nil && err != ErrLimit {
  234. return err
  235. }
  236. if e.marker {
  237. if err := e.writeMatch(eosMatch); err != nil {
  238. return err
  239. }
  240. }
  241. err = e.re.Close()
  242. return err
  243. }
  244. // Compressed returns the number bytes of the input data that been
  245. // compressed.
  246. func (e *encoder) Compressed() int64 {
  247. return e.dict.Pos() - e.start
  248. }