bintree.go 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522
  1. // Copyright 2014-2022 Ulrich Kunitz. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package lzma
  5. import (
  6. "errors"
  7. "unicode"
  8. )
  9. // node represents a node in the binary tree.
  10. type node struct {
  11. // x is the search value
  12. x uint32
  13. // p parent node
  14. p uint32
  15. // l left child
  16. l uint32
  17. // r right child
  18. r uint32
  19. }
  20. // wordLen is the number of bytes represented by the v field of a node.
  21. const wordLen = 4
  22. // binTree supports the identification of the next operation based on a
  23. // binary tree.
  24. //
  25. // Nodes will be identified by their index into the ring buffer.
  26. type binTree struct {
  27. dict *encoderDict
  28. // ring buffer of nodes
  29. node []node
  30. // absolute offset of the entry for the next node. Position 4
  31. // byte larger.
  32. hoff int64
  33. // front position in the node ring buffer
  34. front uint32
  35. // index of the root node
  36. root uint32
  37. // current x value
  38. x uint32
  39. // preallocated array
  40. data []byte
  41. }
  42. // null represents the nonexistent index. We can't use zero because it
  43. // would always exist or we would need to decrease the index for each
  44. // reference.
  45. const null uint32 = 1<<32 - 1
  46. // newBinTree initializes the binTree structure. The capacity defines
  47. // the size of the buffer and defines the maximum distance for which
  48. // matches will be found.
  49. func newBinTree(capacity int) (t *binTree, err error) {
  50. if capacity < 1 {
  51. return nil, errors.New(
  52. "newBinTree: capacity must be larger than zero")
  53. }
  54. if int64(capacity) >= int64(null) {
  55. return nil, errors.New(
  56. "newBinTree: capacity must less 2^{32}-1")
  57. }
  58. t = &binTree{
  59. node: make([]node, capacity),
  60. hoff: -int64(wordLen),
  61. root: null,
  62. data: make([]byte, maxMatchLen),
  63. }
  64. return t, nil
  65. }
  66. func (t *binTree) SetDict(d *encoderDict) { t.dict = d }
  67. // WriteByte writes a single byte into the binary tree.
  68. func (t *binTree) WriteByte(c byte) error {
  69. t.x = (t.x << 8) | uint32(c)
  70. t.hoff++
  71. if t.hoff < 0 {
  72. return nil
  73. }
  74. v := t.front
  75. if int64(v) < t.hoff {
  76. // We are overwriting old nodes stored in the tree.
  77. t.remove(v)
  78. }
  79. t.node[v].x = t.x
  80. t.add(v)
  81. t.front++
  82. if int64(t.front) >= int64(len(t.node)) {
  83. t.front = 0
  84. }
  85. return nil
  86. }
  87. // Writes writes a sequence of bytes into the binTree structure.
  88. func (t *binTree) Write(p []byte) (n int, err error) {
  89. for _, c := range p {
  90. t.WriteByte(c)
  91. }
  92. return len(p), nil
  93. }
  94. // add puts the node v into the tree. The node must not be part of the
  95. // tree before.
  96. func (t *binTree) add(v uint32) {
  97. vn := &t.node[v]
  98. // Set left and right to null indices.
  99. vn.l, vn.r = null, null
  100. // If the binary tree is empty make v the root.
  101. if t.root == null {
  102. t.root = v
  103. vn.p = null
  104. return
  105. }
  106. x := vn.x
  107. p := t.root
  108. // Search for the right leave link and add the new node.
  109. for {
  110. pn := &t.node[p]
  111. if x <= pn.x {
  112. if pn.l == null {
  113. pn.l = v
  114. vn.p = p
  115. return
  116. }
  117. p = pn.l
  118. } else {
  119. if pn.r == null {
  120. pn.r = v
  121. vn.p = p
  122. return
  123. }
  124. p = pn.r
  125. }
  126. }
  127. }
  128. // parent returns the parent node index of v and the pointer to v value
  129. // in the parent.
  130. func (t *binTree) parent(v uint32) (p uint32, ptr *uint32) {
  131. if t.root == v {
  132. return null, &t.root
  133. }
  134. p = t.node[v].p
  135. if t.node[p].l == v {
  136. ptr = &t.node[p].l
  137. } else {
  138. ptr = &t.node[p].r
  139. }
  140. return
  141. }
  142. // Remove node v.
  143. func (t *binTree) remove(v uint32) {
  144. vn := &t.node[v]
  145. p, ptr := t.parent(v)
  146. l, r := vn.l, vn.r
  147. if l == null {
  148. // Move the right child up.
  149. *ptr = r
  150. if r != null {
  151. t.node[r].p = p
  152. }
  153. return
  154. }
  155. if r == null {
  156. // Move the left child up.
  157. *ptr = l
  158. t.node[l].p = p
  159. return
  160. }
  161. // Search the in-order predecessor u.
  162. un := &t.node[l]
  163. ur := un.r
  164. if ur == null {
  165. // In order predecessor is l. Move it up.
  166. un.r = r
  167. t.node[r].p = l
  168. un.p = p
  169. *ptr = l
  170. return
  171. }
  172. var u uint32
  173. for {
  174. // Look for the max value in the tree where l is root.
  175. u = ur
  176. ur = t.node[u].r
  177. if ur == null {
  178. break
  179. }
  180. }
  181. // replace u with ul
  182. un = &t.node[u]
  183. ul := un.l
  184. up := un.p
  185. t.node[up].r = ul
  186. if ul != null {
  187. t.node[ul].p = up
  188. }
  189. // replace v by u
  190. un.l, un.r = l, r
  191. t.node[l].p = u
  192. t.node[r].p = u
  193. *ptr = u
  194. un.p = p
  195. }
  196. // search looks for the node that have the value x or for the nodes that
  197. // brace it. The node highest in the tree with the value x will be
  198. // returned. All other nodes with the same value live in left subtree of
  199. // the returned node.
  200. func (t *binTree) search(v uint32, x uint32) (a, b uint32) {
  201. a, b = null, null
  202. if v == null {
  203. return
  204. }
  205. for {
  206. vn := &t.node[v]
  207. if x <= vn.x {
  208. if x == vn.x {
  209. return v, v
  210. }
  211. b = v
  212. if vn.l == null {
  213. return
  214. }
  215. v = vn.l
  216. } else {
  217. a = v
  218. if vn.r == null {
  219. return
  220. }
  221. v = vn.r
  222. }
  223. }
  224. }
  225. // max returns the node with maximum value in the subtree with v as
  226. // root.
  227. func (t *binTree) max(v uint32) uint32 {
  228. if v == null {
  229. return null
  230. }
  231. for {
  232. r := t.node[v].r
  233. if r == null {
  234. return v
  235. }
  236. v = r
  237. }
  238. }
  239. // min returns the node with the minimum value in the subtree with v as
  240. // root.
  241. func (t *binTree) min(v uint32) uint32 {
  242. if v == null {
  243. return null
  244. }
  245. for {
  246. l := t.node[v].l
  247. if l == null {
  248. return v
  249. }
  250. v = l
  251. }
  252. }
  253. // pred returns the in-order predecessor of node v.
  254. func (t *binTree) pred(v uint32) uint32 {
  255. if v == null {
  256. return null
  257. }
  258. u := t.max(t.node[v].l)
  259. if u != null {
  260. return u
  261. }
  262. for {
  263. p := t.node[v].p
  264. if p == null {
  265. return null
  266. }
  267. if t.node[p].r == v {
  268. return p
  269. }
  270. v = p
  271. }
  272. }
  273. // succ returns the in-order successor of node v.
  274. func (t *binTree) succ(v uint32) uint32 {
  275. if v == null {
  276. return null
  277. }
  278. u := t.min(t.node[v].r)
  279. if u != null {
  280. return u
  281. }
  282. for {
  283. p := t.node[v].p
  284. if p == null {
  285. return null
  286. }
  287. if t.node[p].l == v {
  288. return p
  289. }
  290. v = p
  291. }
  292. }
  293. // xval converts the first four bytes of a into an 32-bit unsigned
  294. // integer in big-endian order.
  295. func xval(a []byte) uint32 {
  296. var x uint32
  297. switch len(a) {
  298. default:
  299. x |= uint32(a[3])
  300. fallthrough
  301. case 3:
  302. x |= uint32(a[2]) << 8
  303. fallthrough
  304. case 2:
  305. x |= uint32(a[1]) << 16
  306. fallthrough
  307. case 1:
  308. x |= uint32(a[0]) << 24
  309. case 0:
  310. }
  311. return x
  312. }
  313. // dumpX converts value x into a four-letter string.
  314. func dumpX(x uint32) string {
  315. a := make([]byte, 4)
  316. for i := 0; i < 4; i++ {
  317. c := byte(x >> uint((3-i)*8))
  318. if unicode.IsGraphic(rune(c)) {
  319. a[i] = c
  320. } else {
  321. a[i] = '.'
  322. }
  323. }
  324. return string(a)
  325. }
  326. /*
  327. // dumpNode writes a representation of the node v into the io.Writer.
  328. func (t *binTree) dumpNode(w io.Writer, v uint32, indent int) {
  329. if v == null {
  330. return
  331. }
  332. vn := &t.node[v]
  333. t.dumpNode(w, vn.r, indent+2)
  334. for i := 0; i < indent; i++ {
  335. fmt.Fprint(w, " ")
  336. }
  337. if vn.p == null {
  338. fmt.Fprintf(w, "node %d %q parent null\n", v, dumpX(vn.x))
  339. } else {
  340. fmt.Fprintf(w, "node %d %q parent %d\n", v, dumpX(vn.x), vn.p)
  341. }
  342. t.dumpNode(w, vn.l, indent+2)
  343. }
  344. // dump prints a representation of the binary tree into the writer.
  345. func (t *binTree) dump(w io.Writer) error {
  346. bw := bufio.NewWriter(w)
  347. t.dumpNode(bw, t.root, 0)
  348. return bw.Flush()
  349. }
  350. */
  351. func (t *binTree) distance(v uint32) int {
  352. dist := int(t.front) - int(v)
  353. if dist <= 0 {
  354. dist += len(t.node)
  355. }
  356. return dist
  357. }
  358. type matchParams struct {
  359. rep [4]uint32
  360. // length when match will be accepted
  361. nAccept int
  362. // nodes to check
  363. check int
  364. // finish if length get shorter
  365. stopShorter bool
  366. }
  367. func (t *binTree) match(m match, distIter func() (int, bool), p matchParams,
  368. ) (r match, checked int, accepted bool) {
  369. buf := &t.dict.buf
  370. for {
  371. if checked >= p.check {
  372. return m, checked, true
  373. }
  374. dist, ok := distIter()
  375. if !ok {
  376. return m, checked, false
  377. }
  378. checked++
  379. if m.n > 0 {
  380. i := buf.rear - dist + m.n - 1
  381. if i < 0 {
  382. i += len(buf.data)
  383. } else if i >= len(buf.data) {
  384. i -= len(buf.data)
  385. }
  386. if buf.data[i] != t.data[m.n-1] {
  387. if p.stopShorter {
  388. return m, checked, false
  389. }
  390. continue
  391. }
  392. }
  393. n := buf.matchLen(dist, t.data)
  394. switch n {
  395. case 0:
  396. if p.stopShorter {
  397. return m, checked, false
  398. }
  399. continue
  400. case 1:
  401. if uint32(dist-minDistance) != p.rep[0] {
  402. continue
  403. }
  404. }
  405. if n < m.n || (n == m.n && int64(dist) >= m.distance) {
  406. continue
  407. }
  408. m = match{int64(dist), n}
  409. if n >= p.nAccept {
  410. return m, checked, true
  411. }
  412. }
  413. }
  414. func (t *binTree) NextOp(rep [4]uint32) operation {
  415. // retrieve maxMatchLen data
  416. n, _ := t.dict.buf.Peek(t.data[:maxMatchLen])
  417. if n == 0 {
  418. panic("no data in buffer")
  419. }
  420. t.data = t.data[:n]
  421. var (
  422. m match
  423. x, u, v uint32
  424. iterPred, iterSucc func() (int, bool)
  425. )
  426. p := matchParams{
  427. rep: rep,
  428. nAccept: maxMatchLen,
  429. check: 32,
  430. }
  431. i := 4
  432. iterSmall := func() (dist int, ok bool) {
  433. i--
  434. if i <= 0 {
  435. return 0, false
  436. }
  437. return i, true
  438. }
  439. m, checked, accepted := t.match(m, iterSmall, p)
  440. if accepted {
  441. goto end
  442. }
  443. p.check -= checked
  444. x = xval(t.data)
  445. u, v = t.search(t.root, x)
  446. if u == v && len(t.data) == 4 {
  447. iter := func() (dist int, ok bool) {
  448. if u == null {
  449. return 0, false
  450. }
  451. dist = t.distance(u)
  452. u, v = t.search(t.node[u].l, x)
  453. if u != v {
  454. u = null
  455. }
  456. return dist, true
  457. }
  458. m, _, _ = t.match(m, iter, p)
  459. goto end
  460. }
  461. p.stopShorter = true
  462. iterSucc = func() (dist int, ok bool) {
  463. if v == null {
  464. return 0, false
  465. }
  466. dist = t.distance(v)
  467. v = t.succ(v)
  468. return dist, true
  469. }
  470. m, checked, accepted = t.match(m, iterSucc, p)
  471. if accepted {
  472. goto end
  473. }
  474. p.check -= checked
  475. iterPred = func() (dist int, ok bool) {
  476. if u == null {
  477. return 0, false
  478. }
  479. dist = t.distance(u)
  480. u = t.pred(u)
  481. return dist, true
  482. }
  483. m, _, _ = t.match(m, iterPred, p)
  484. end:
  485. if m.n == 0 {
  486. return lit{t.data[0]}
  487. }
  488. return m
  489. }