writer.go 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
  1. /*
  2. * Copyright 2011-2012 Branimir Karadzic. All rights reserved.
  3. *
  4. * Redistribution and use in source and binary forms, with or without modification,
  5. * are permitted provided that the following conditions are met:
  6. *
  7. * 1. Redistributions of source code must retain the above copyright notice, this
  8. * list of conditions and the following disclaimer.
  9. *
  10. * 2. Redistributions in binary form must reproduce the above copyright notice,
  11. * this list of conditions and the following disclaimer in the documentation
  12. * and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR
  15. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  16. * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
  17. * SHALL COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  18. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  19. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  20. * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  21. * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  22. * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  23. * THE POSSIBILITY OF SUCH DAMAGE.
  24. */
  25. package lz4
  26. import (
  27. "errors"
  28. "sync"
  29. )
  30. const (
  31. minMatch = 4
  32. hashLog = 16
  33. hashTableSize = 1 << hashLog
  34. hashShift = (minMatch * 8) - hashLog
  35. incompressible uint32 = 128
  36. uninitHash = 0x88888888
  37. mfLimit = 8 + minMatch // The last match cannot start within the last 12 bytes.
  38. // MaxInputSize is the largest buffer than can be compressed in a single block
  39. MaxInputSize = 0x7E000000
  40. )
  41. var (
  42. // ErrTooLarge indicates the input buffer was too large
  43. ErrTooLarge = errors.New("input too large")
  44. ErrEncodeTooSmall = errors.New("encode buffer too small")
  45. hashPool = sync.Pool{
  46. New: func() interface{} {
  47. return make([]uint32, hashTableSize)
  48. },
  49. }
  50. )
  51. type encoder struct {
  52. src []byte
  53. dst []byte
  54. hashTable []uint32
  55. pos uint32
  56. anchor uint32
  57. dpos uint32
  58. }
  59. // CompressBound returns the maximum length of a lz4 block
  60. func CompressBound(isize int) int {
  61. if isize > MaxInputSize {
  62. return 0
  63. }
  64. return isize + ((isize) / 255) + 16
  65. }
  66. func (e *encoder) writeLiterals(length, mlLen, pos uint32) {
  67. ln := length
  68. var code byte
  69. if ln > runMask-1 {
  70. code = runMask
  71. } else {
  72. code = byte(ln)
  73. }
  74. if mlLen > mlMask-1 {
  75. e.dst[e.dpos] = (code << mlBits) + byte(mlMask)
  76. } else {
  77. e.dst[e.dpos] = (code << mlBits) + byte(mlLen)
  78. }
  79. e.dpos++
  80. if code == runMask {
  81. ln -= runMask
  82. for ; ln > 254; ln -= 255 {
  83. e.dst[e.dpos] = 255
  84. e.dpos++
  85. }
  86. e.dst[e.dpos] = byte(ln)
  87. e.dpos++
  88. }
  89. for ii := uint32(0); ii < length; ii++ {
  90. e.dst[e.dpos+ii] = e.src[pos+ii]
  91. }
  92. e.dpos += length
  93. }
  94. // Encode returns the encoded form of src. The returned array may be a
  95. // sub-slice of dst if it was large enough to hold the entire output.
  96. func Encode(dst, src []byte) (compressedSize int, error error) {
  97. if len(src) >= MaxInputSize {
  98. return 0, ErrTooLarge
  99. }
  100. if n := CompressBound(len(src)); len(dst) < n {
  101. return 0, ErrEncodeTooSmall
  102. }
  103. hashTable := hashPool.Get().([]uint32)
  104. for i := range hashTable {
  105. hashTable[i] = 0
  106. }
  107. e := encoder{src: src, dst: dst, hashTable: hashTable}
  108. defer func() {
  109. hashPool.Put(hashTable)
  110. }()
  111. // binary.LittleEndian.PutUint32(dst, uint32(len(src)))
  112. // e.dpos = 0
  113. var (
  114. step uint32 = 1
  115. limit = incompressible
  116. )
  117. for {
  118. if int(e.pos)+12 >= len(e.src) {
  119. e.writeLiterals(uint32(len(e.src))-e.anchor, 0, e.anchor)
  120. return int(e.dpos), nil
  121. }
  122. sequence := uint32(e.src[e.pos+3])<<24 | uint32(e.src[e.pos+2])<<16 | uint32(e.src[e.pos+1])<<8 | uint32(e.src[e.pos+0])
  123. hash := (sequence * 2654435761) >> hashShift
  124. ref := e.hashTable[hash] + uninitHash
  125. e.hashTable[hash] = e.pos - uninitHash
  126. if ((e.pos-ref)>>16) != 0 || uint32(e.src[ref+3])<<24|uint32(e.src[ref+2])<<16|uint32(e.src[ref+1])<<8|uint32(e.src[ref+0]) != sequence {
  127. if e.pos-e.anchor > limit {
  128. limit <<= 1
  129. step += 1 + (step >> 2)
  130. }
  131. e.pos += step
  132. continue
  133. }
  134. if step > 1 {
  135. e.hashTable[hash] = ref - uninitHash
  136. e.pos -= step - 1
  137. step = 1
  138. continue
  139. }
  140. limit = incompressible
  141. ln := e.pos - e.anchor
  142. back := e.pos - ref
  143. anchor := e.anchor
  144. e.pos += minMatch
  145. ref += minMatch
  146. e.anchor = e.pos
  147. for int(e.pos) < len(e.src)-5 && e.src[e.pos] == e.src[ref] {
  148. e.pos++
  149. ref++
  150. }
  151. mlLen := e.pos - e.anchor
  152. e.writeLiterals(ln, mlLen, anchor)
  153. e.dst[e.dpos] = uint8(back)
  154. e.dst[e.dpos+1] = uint8(back >> 8)
  155. e.dpos += 2
  156. if mlLen > mlMask-1 {
  157. mlLen -= mlMask
  158. for mlLen > 254 {
  159. mlLen -= 255
  160. e.dst[e.dpos] = 255
  161. e.dpos++
  162. }
  163. e.dst[e.dpos] = byte(mlLen)
  164. e.dpos++
  165. }
  166. e.anchor = e.pos
  167. }
  168. }