sketch.go 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. /*
  2. * Copyright 2019 Dgraph Labs, Inc. and Contributors
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. // This package includes multiple probabalistic data structures needed for
  17. // admission/eviction metadata. Most are Counting Bloom Filter variations, but
  18. // a caching-specific feature that is also required is a "freshness" mechanism,
  19. // which basically serves as a "lifetime" process. This freshness mechanism
  20. // was described in the original TinyLFU paper [1], but other mechanisms may
  21. // be better suited for certain data distributions.
  22. //
  23. // [1]: https://arxiv.org/abs/1512.00727
  24. package ristretto
  25. import (
  26. "fmt"
  27. "math/rand"
  28. "time"
  29. )
  30. // cmSketch is a Count-Min sketch implementation with 4-bit counters, heavily
  31. // based on Damian Gryski's CM4 [1].
  32. //
  33. // [1]: https://github.com/dgryski/go-tinylfu/blob/master/cm4.go
  34. type cmSketch struct {
  35. rows [cmDepth]cmRow
  36. seed [cmDepth]uint64
  37. mask uint64
  38. }
  39. const (
  40. // cmDepth is the number of counter copies to store (think of it as rows).
  41. cmDepth = 4
  42. )
  43. func newCmSketch(numCounters int64) *cmSketch {
  44. if numCounters == 0 {
  45. panic("cmSketch: bad numCounters")
  46. }
  47. // Get the next power of 2 for better cache performance.
  48. numCounters = next2Power(numCounters)
  49. sketch := &cmSketch{mask: uint64(numCounters - 1)}
  50. // Initialize rows of counters and seeds.
  51. source := rand.New(rand.NewSource(time.Now().UnixNano()))
  52. for i := 0; i < cmDepth; i++ {
  53. sketch.seed[i] = source.Uint64()
  54. sketch.rows[i] = newCmRow(numCounters)
  55. }
  56. return sketch
  57. }
  58. // Increment increments the count(ers) for the specified key.
  59. func (s *cmSketch) Increment(hashed uint64) {
  60. for i := range s.rows {
  61. s.rows[i].increment((hashed ^ s.seed[i]) & s.mask)
  62. }
  63. }
  64. // Estimate returns the value of the specified key.
  65. func (s *cmSketch) Estimate(hashed uint64) int64 {
  66. min := byte(255)
  67. for i := range s.rows {
  68. val := s.rows[i].get((hashed ^ s.seed[i]) & s.mask)
  69. if val < min {
  70. min = val
  71. }
  72. }
  73. return int64(min)
  74. }
  75. // Reset halves all counter values.
  76. func (s *cmSketch) Reset() {
  77. for _, r := range s.rows {
  78. r.reset()
  79. }
  80. }
  81. // Clear zeroes all counters.
  82. func (s *cmSketch) Clear() {
  83. for _, r := range s.rows {
  84. r.clear()
  85. }
  86. }
  87. // cmRow is a row of bytes, with each byte holding two counters.
  88. type cmRow []byte
  89. func newCmRow(numCounters int64) cmRow {
  90. return make(cmRow, numCounters/2)
  91. }
  92. func (r cmRow) get(n uint64) byte {
  93. return byte(r[n/2]>>((n&1)*4)) & 0x0f
  94. }
  95. func (r cmRow) increment(n uint64) {
  96. // Index of the counter.
  97. i := n / 2
  98. // Shift distance (even 0, odd 4).
  99. s := (n & 1) * 4
  100. // Counter value.
  101. v := (r[i] >> s) & 0x0f
  102. // Only increment if not max value (overflow wrap is bad for LFU).
  103. if v < 15 {
  104. r[i] += 1 << s
  105. }
  106. }
  107. func (r cmRow) reset() {
  108. // Halve each counter.
  109. for i := range r {
  110. r[i] = (r[i] >> 1) & 0x77
  111. }
  112. }
  113. func (r cmRow) clear() {
  114. // Zero each counter.
  115. for i := range r {
  116. r[i] = 0
  117. }
  118. }
  119. func (r cmRow) string() string {
  120. s := ""
  121. for i := uint64(0); i < uint64(len(r)*2); i++ {
  122. s += fmt.Sprintf("%02d ", (r[(i/2)]>>((i&1)*4))&0x0f)
  123. }
  124. s = s[:len(s)-1]
  125. return s
  126. }
  127. // next2Power rounds x up to the next power of 2, if it's not already one.
  128. func next2Power(x int64) int64 {
  129. x--
  130. x |= x >> 1
  131. x |= x >> 2
  132. x |= x >> 4
  133. x |= x >> 8
  134. x |= x >> 16
  135. x |= x >> 32
  136. x++
  137. return x
  138. }