ddsketch.go 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716
  1. // Unless explicitly stated otherwise all files in this repository are licensed
  2. // under the Apache License 2.0.
  3. // This product includes software developed at Datadog (https://www.datadoghq.com/).
  4. // Copyright 2021 Datadog, Inc.
  5. package ddsketch
  6. import (
  7. "errors"
  8. "io"
  9. "math"
  10. enc "github.com/DataDog/sketches-go/ddsketch/encoding"
  11. "github.com/DataDog/sketches-go/ddsketch/mapping"
  12. "github.com/DataDog/sketches-go/ddsketch/pb/sketchpb"
  13. "github.com/DataDog/sketches-go/ddsketch/stat"
  14. "github.com/DataDog/sketches-go/ddsketch/store"
  15. )
  16. var (
  17. errEmptySketch error = errors.New("no such element exists")
  18. errUnknownFlag error = errors.New("unknown encoding flag")
  19. )
  20. // Unexported to prevent usage and avoid the cost of dynamic dispatch
  21. type quantileSketch interface {
  22. RelativeAccuracy() float64
  23. IsEmpty() bool
  24. GetCount() float64
  25. GetSum() float64
  26. GetMinValue() (float64, error)
  27. GetMaxValue() (float64, error)
  28. GetValueAtQuantile(quantile float64) (float64, error)
  29. GetValuesAtQuantiles(quantiles []float64) ([]float64, error)
  30. ForEach(f func(value, count float64) (stop bool))
  31. Add(value float64) error
  32. AddWithCount(value, count float64) error
  33. // MergeWith
  34. // ChangeMapping
  35. Reweight(factor float64) error
  36. Clear()
  37. // Copy
  38. Encode(b *[]byte, omitIndexMapping bool)
  39. DecodeAndMergeWith(b []byte) error
  40. }
  41. var _ quantileSketch = (*DDSketch)(nil)
  42. var _ quantileSketch = (*DDSketchWithExactSummaryStatistics)(nil)
  43. type DDSketch struct {
  44. mapping.IndexMapping
  45. positiveValueStore store.Store
  46. negativeValueStore store.Store
  47. zeroCount float64
  48. minIndexableAbsoluteValue float64
  49. maxIndexableValue float64
  50. }
  51. func NewDDSketchFromStoreProvider(indexMapping mapping.IndexMapping, storeProvider store.Provider) *DDSketch {
  52. return NewDDSketch(indexMapping, storeProvider(), storeProvider())
  53. }
  54. func NewDDSketch(indexMapping mapping.IndexMapping, positiveValueStore store.Store, negativeValueStore store.Store) *DDSketch {
  55. return &DDSketch{
  56. IndexMapping: indexMapping,
  57. positiveValueStore: positiveValueStore,
  58. negativeValueStore: negativeValueStore,
  59. minIndexableAbsoluteValue: indexMapping.MinIndexableValue(),
  60. maxIndexableValue: indexMapping.MaxIndexableValue(),
  61. }
  62. }
  63. func NewDefaultDDSketch(relativeAccuracy float64) (*DDSketch, error) {
  64. m, err := mapping.NewDefaultMapping(relativeAccuracy)
  65. if err != nil {
  66. return nil, err
  67. }
  68. return NewDDSketchFromStoreProvider(m, store.DefaultProvider), nil
  69. }
  70. // Constructs an instance of DDSketch that offers constant-time insertion and whose size grows indefinitely
  71. // to accommodate for the range of input values.
  72. func LogUnboundedDenseDDSketch(relativeAccuracy float64) (*DDSketch, error) {
  73. indexMapping, err := mapping.NewLogarithmicMapping(relativeAccuracy)
  74. if err != nil {
  75. return nil, err
  76. }
  77. return NewDDSketch(indexMapping, store.NewDenseStore(), store.NewDenseStore()), nil
  78. }
  79. // Constructs an instance of DDSketch that offers constant-time insertion and whose size grows until the
  80. // maximum number of bins is reached, at which point bins with lowest indices are collapsed, which causes the
  81. // relative accuracy guarantee to be lost on lowest quantiles if values are all positive, or the mid-range
  82. // quantiles for values closest to zero if values include negative numbers.
  83. func LogCollapsingLowestDenseDDSketch(relativeAccuracy float64, maxNumBins int) (*DDSketch, error) {
  84. indexMapping, err := mapping.NewLogarithmicMapping(relativeAccuracy)
  85. if err != nil {
  86. return nil, err
  87. }
  88. return NewDDSketch(indexMapping, store.NewCollapsingLowestDenseStore(maxNumBins), store.NewCollapsingLowestDenseStore(maxNumBins)), nil
  89. }
  90. // Constructs an instance of DDSketch that offers constant-time insertion and whose size grows until the
  91. // maximum number of bins is reached, at which point bins with highest indices are collapsed, which causes the
  92. // relative accuracy guarantee to be lost on highest quantiles if values are all positive, or the lowest and
  93. // highest quantiles if values include negative numbers.
  94. func LogCollapsingHighestDenseDDSketch(relativeAccuracy float64, maxNumBins int) (*DDSketch, error) {
  95. indexMapping, err := mapping.NewLogarithmicMapping(relativeAccuracy)
  96. if err != nil {
  97. return nil, err
  98. }
  99. return NewDDSketch(indexMapping, store.NewCollapsingHighestDenseStore(maxNumBins), store.NewCollapsingHighestDenseStore(maxNumBins)), nil
  100. }
  101. // Adds a value to the sketch.
  102. func (s *DDSketch) Add(value float64) error {
  103. return s.AddWithCount(value, float64(1))
  104. }
  105. // Adds a value to the sketch with a float64 count.
  106. func (s *DDSketch) AddWithCount(value, count float64) error {
  107. if value < -s.maxIndexableValue || value > s.maxIndexableValue {
  108. return errors.New("The input value is outside the range that is tracked by the sketch.")
  109. }
  110. if count < 0 {
  111. return errors.New("The count cannot be negative.")
  112. }
  113. if value > s.minIndexableAbsoluteValue {
  114. s.positiveValueStore.AddWithCount(s.Index(value), count)
  115. } else if value < -s.minIndexableAbsoluteValue {
  116. s.negativeValueStore.AddWithCount(s.Index(-value), count)
  117. } else {
  118. s.zeroCount += count
  119. }
  120. return nil
  121. }
  122. // Return a (deep) copy of this sketch.
  123. func (s *DDSketch) Copy() *DDSketch {
  124. return &DDSketch{
  125. IndexMapping: s.IndexMapping,
  126. positiveValueStore: s.positiveValueStore.Copy(),
  127. negativeValueStore: s.negativeValueStore.Copy(),
  128. zeroCount: s.zeroCount,
  129. minIndexableAbsoluteValue: s.minIndexableAbsoluteValue,
  130. maxIndexableValue: s.maxIndexableValue,
  131. }
  132. }
  133. // Clear empties the sketch while allowing reusing already allocated memory.
  134. func (s *DDSketch) Clear() {
  135. s.positiveValueStore.Clear()
  136. s.negativeValueStore.Clear()
  137. s.zeroCount = 0
  138. }
  139. // Return the value at the specified quantile. Return a non-nil error if the quantile is invalid
  140. // or if the sketch is empty.
  141. func (s *DDSketch) GetValueAtQuantile(quantile float64) (float64, error) {
  142. if quantile < 0 || quantile > 1 {
  143. return math.NaN(), errors.New("The quantile must be between 0 and 1.")
  144. }
  145. count := s.GetCount()
  146. if count == 0 {
  147. return math.NaN(), errEmptySketch
  148. }
  149. rank := quantile * (count - 1)
  150. negativeValueCount := s.negativeValueStore.TotalCount()
  151. if rank < negativeValueCount {
  152. return -s.Value(s.negativeValueStore.KeyAtRank(negativeValueCount - 1 - rank)), nil
  153. } else if rank < s.zeroCount+negativeValueCount {
  154. return 0, nil
  155. } else {
  156. return s.Value(s.positiveValueStore.KeyAtRank(rank - s.zeroCount - negativeValueCount)), nil
  157. }
  158. }
  159. // Return the values at the respective specified quantiles. Return a non-nil error if any of the quantiles
  160. // is invalid or if the sketch is empty.
  161. func (s *DDSketch) GetValuesAtQuantiles(quantiles []float64) ([]float64, error) {
  162. values := make([]float64, len(quantiles))
  163. for i, q := range quantiles {
  164. val, err := s.GetValueAtQuantile(q)
  165. if err != nil {
  166. return nil, err
  167. }
  168. values[i] = val
  169. }
  170. return values, nil
  171. }
  172. // Return the total number of values that have been added to this sketch.
  173. func (s *DDSketch) GetCount() float64 {
  174. return s.zeroCount + s.positiveValueStore.TotalCount() + s.negativeValueStore.TotalCount()
  175. }
  176. // Return true iff no value has been added to this sketch.
  177. func (s *DDSketch) IsEmpty() bool {
  178. return s.zeroCount == 0 && s.positiveValueStore.IsEmpty() && s.negativeValueStore.IsEmpty()
  179. }
  180. // Return the maximum value that has been added to this sketch. Return a non-nil error if the sketch
  181. // is empty.
  182. func (s *DDSketch) GetMaxValue() (float64, error) {
  183. if !s.positiveValueStore.IsEmpty() {
  184. maxIndex, _ := s.positiveValueStore.MaxIndex()
  185. return s.Value(maxIndex), nil
  186. } else if s.zeroCount > 0 {
  187. return 0, nil
  188. } else {
  189. minIndex, err := s.negativeValueStore.MinIndex()
  190. if err != nil {
  191. return math.NaN(), err
  192. }
  193. return -s.Value(minIndex), nil
  194. }
  195. }
  196. // Return the minimum value that has been added to this sketch. Returns a non-nil error if the sketch
  197. // is empty.
  198. func (s *DDSketch) GetMinValue() (float64, error) {
  199. if !s.negativeValueStore.IsEmpty() {
  200. maxIndex, _ := s.negativeValueStore.MaxIndex()
  201. return -s.Value(maxIndex), nil
  202. } else if s.zeroCount > 0 {
  203. return 0, nil
  204. } else {
  205. minIndex, err := s.positiveValueStore.MinIndex()
  206. if err != nil {
  207. return math.NaN(), err
  208. }
  209. return s.Value(minIndex), nil
  210. }
  211. }
  212. // GetSum returns an approximation of the sum of the values that have been added to the sketch. If the
  213. // values that have been added to the sketch all have the same sign, the approximation error has
  214. // the relative accuracy guarantees of the mapping used for this sketch.
  215. func (s *DDSketch) GetSum() (sum float64) {
  216. s.ForEach(func(value float64, count float64) (stop bool) {
  217. sum += value * count
  218. return false
  219. })
  220. return sum
  221. }
  222. // ForEach applies f on the bins of the sketches until f returns true.
  223. // There is no guarantee on the bin iteration order.
  224. func (s *DDSketch) ForEach(f func(value, count float64) (stop bool)) {
  225. if s.zeroCount != 0 && f(0, s.zeroCount) {
  226. return
  227. }
  228. stopped := false
  229. s.positiveValueStore.ForEach(func(index int, count float64) bool {
  230. stopped = f(s.IndexMapping.Value(index), count)
  231. return stopped
  232. })
  233. if stopped {
  234. return
  235. }
  236. s.negativeValueStore.ForEach(func(index int, count float64) bool {
  237. return f(-s.IndexMapping.Value(index), count)
  238. })
  239. }
  240. // Merges the other sketch into this one. After this operation, this sketch encodes the values that
  241. // were added to both this and the other sketches.
  242. func (s *DDSketch) MergeWith(other *DDSketch) error {
  243. if !s.IndexMapping.Equals(other.IndexMapping) {
  244. return errors.New("Cannot merge sketches with different index mappings.")
  245. }
  246. s.positiveValueStore.MergeWith(other.positiveValueStore)
  247. s.negativeValueStore.MergeWith(other.negativeValueStore)
  248. s.zeroCount += other.zeroCount
  249. return nil
  250. }
  251. // Generates a protobuf representation of this DDSketch.
  252. func (s *DDSketch) ToProto() *sketchpb.DDSketch {
  253. return &sketchpb.DDSketch{
  254. Mapping: s.IndexMapping.ToProto(),
  255. PositiveValues: s.positiveValueStore.ToProto(),
  256. NegativeValues: s.negativeValueStore.ToProto(),
  257. ZeroCount: s.zeroCount,
  258. }
  259. }
  260. // FromProto builds a new instance of DDSketch based on the provided protobuf representation, using a Dense store.
  261. func FromProto(pb *sketchpb.DDSketch) (*DDSketch, error) {
  262. return FromProtoWithStoreProvider(pb, store.DenseStoreConstructor)
  263. }
  264. func FromProtoWithStoreProvider(pb *sketchpb.DDSketch, storeProvider store.Provider) (*DDSketch, error) {
  265. positiveValueStore := storeProvider()
  266. store.MergeWithProto(positiveValueStore, pb.PositiveValues)
  267. negativeValueStore := storeProvider()
  268. store.MergeWithProto(negativeValueStore, pb.NegativeValues)
  269. m, err := mapping.FromProto(pb.Mapping)
  270. if err != nil {
  271. return nil, err
  272. }
  273. return &DDSketch{
  274. IndexMapping: m,
  275. positiveValueStore: positiveValueStore,
  276. negativeValueStore: negativeValueStore,
  277. zeroCount: pb.ZeroCount,
  278. minIndexableAbsoluteValue: m.MinIndexableValue(),
  279. maxIndexableValue: m.MaxIndexableValue(),
  280. }, nil
  281. }
  282. // Encode serializes the sketch and appends the serialized content to the provided []byte.
  283. // If the capacity of the provided []byte is large enough, Encode does not allocate memory space.
  284. // When the index mapping is known at the time of deserialization, omitIndexMapping can be set to true to avoid encoding it and to make the serialized content smaller.
  285. // The encoding format is described in the encoding/flag module.
  286. func (s *DDSketch) Encode(b *[]byte, omitIndexMapping bool) {
  287. if s.zeroCount != 0 {
  288. enc.EncodeFlag(b, enc.FlagZeroCountVarFloat)
  289. enc.EncodeVarfloat64(b, s.zeroCount)
  290. }
  291. if !omitIndexMapping {
  292. s.IndexMapping.Encode(b)
  293. }
  294. s.positiveValueStore.Encode(b, enc.FlagTypePositiveStore)
  295. s.negativeValueStore.Encode(b, enc.FlagTypeNegativeStore)
  296. }
  297. // DecodeDDSketch deserializes a sketch.
  298. // Stores are built using storeProvider. The store type needs not match the
  299. // store that the serialized sketch initially used. However, using the same
  300. // store type may make decoding faster. In the absence of high performance
  301. // requirements, store.BufferedPaginatedStoreConstructor is a sound enough
  302. // choice of store provider.
  303. // To avoid memory allocations, it is possible to use a store provider that
  304. // reuses stores, by calling Clear() on previously used stores before providing
  305. // the store.
  306. // If the serialized data does not contain the index mapping, you need to
  307. // specify the index mapping that was used in the sketch that was encoded.
  308. // Otherwise, you can use nil and the index mapping will be decoded from the
  309. // serialized data.
  310. // It is possible to decode with this function an encoded
  311. // DDSketchWithExactSummaryStatistics, but the exact summary statistics will be
  312. // lost.
  313. func DecodeDDSketch(b []byte, storeProvider store.Provider, indexMapping mapping.IndexMapping) (*DDSketch, error) {
  314. s := &DDSketch{
  315. IndexMapping: indexMapping,
  316. positiveValueStore: storeProvider(),
  317. negativeValueStore: storeProvider(),
  318. zeroCount: float64(0),
  319. }
  320. err := s.DecodeAndMergeWith(b)
  321. return s, err
  322. }
  323. // DecodeAndMergeWith deserializes a sketch and merges its content in the
  324. // receiver sketch.
  325. // If the serialized content contains an index mapping that differs from the one
  326. // of the receiver, DecodeAndMergeWith returns an error.
  327. func (s *DDSketch) DecodeAndMergeWith(bb []byte) error {
  328. return s.decodeAndMergeWith(bb, func(b *[]byte, flag enc.Flag) error {
  329. switch flag {
  330. case enc.FlagCount, enc.FlagSum, enc.FlagMin, enc.FlagMax:
  331. // Exact summary stats are ignored.
  332. if len(*b) < 8 {
  333. return io.EOF
  334. }
  335. *b = (*b)[8:]
  336. return nil
  337. default:
  338. return errUnknownFlag
  339. }
  340. })
  341. }
  342. func (s *DDSketch) decodeAndMergeWith(bb []byte, fallbackDecode func(b *[]byte, flag enc.Flag) error) error {
  343. b := &bb
  344. for len(*b) > 0 {
  345. flag, err := enc.DecodeFlag(b)
  346. if err != nil {
  347. return err
  348. }
  349. switch flag.Type() {
  350. case enc.FlagTypePositiveStore:
  351. s.positiveValueStore.DecodeAndMergeWith(b, flag.SubFlag())
  352. case enc.FlagTypeNegativeStore:
  353. s.negativeValueStore.DecodeAndMergeWith(b, flag.SubFlag())
  354. case enc.FlagTypeIndexMapping:
  355. decodedIndexMapping, err := mapping.Decode(b, flag)
  356. if err != nil {
  357. return err
  358. }
  359. if s.IndexMapping != nil && !s.IndexMapping.Equals(decodedIndexMapping) {
  360. return errors.New("index mapping mismatch")
  361. }
  362. s.IndexMapping = decodedIndexMapping
  363. default:
  364. switch flag {
  365. case enc.FlagZeroCountVarFloat:
  366. decodedZeroCount, err := enc.DecodeVarfloat64(b)
  367. if err != nil {
  368. return err
  369. }
  370. s.zeroCount += decodedZeroCount
  371. default:
  372. err := fallbackDecode(b, flag)
  373. if err != nil {
  374. return err
  375. }
  376. }
  377. }
  378. }
  379. if s.IndexMapping == nil {
  380. return errors.New("missing index mapping")
  381. }
  382. s.minIndexableAbsoluteValue = s.IndexMapping.MinIndexableValue()
  383. s.maxIndexableValue = s.IndexMapping.MaxIndexableValue()
  384. return nil
  385. }
  386. // ChangeMapping changes the store to a new mapping.
  387. // it doesn't change s but returns a newly created sketch.
  388. // positiveStore and negativeStore must be different stores, and be empty when the function is called.
  389. // It is not the conversion that minimizes the loss in relative
  390. // accuracy, but it avoids artefacts like empty bins that make the histograms look bad.
  391. // scaleFactor allows to scale out / in all values. (changing units for eg)
  392. func (s *DDSketch) ChangeMapping(newMapping mapping.IndexMapping, positiveStore store.Store, negativeStore store.Store, scaleFactor float64) *DDSketch {
  393. if scaleFactor == 1 && s.IndexMapping.Equals(newMapping) {
  394. return s.Copy()
  395. }
  396. changeStoreMapping(s.IndexMapping, newMapping, s.positiveValueStore, positiveStore, scaleFactor)
  397. changeStoreMapping(s.IndexMapping, newMapping, s.negativeValueStore, negativeStore, scaleFactor)
  398. newSketch := NewDDSketch(newMapping, positiveStore, negativeStore)
  399. newSketch.zeroCount = s.zeroCount
  400. return newSketch
  401. }
  402. func changeStoreMapping(oldMapping, newMapping mapping.IndexMapping, oldStore, newStore store.Store, scaleFactor float64) {
  403. oldStore.ForEach(func(index int, count float64) (stop bool) {
  404. inLowerBound := oldMapping.LowerBound(index) * scaleFactor
  405. inHigherBound := oldMapping.LowerBound(index+1) * scaleFactor
  406. inSize := inHigherBound - inLowerBound
  407. for outIndex := newMapping.Index(inLowerBound); newMapping.LowerBound(outIndex) < inHigherBound; outIndex++ {
  408. outLowerBound := newMapping.LowerBound(outIndex)
  409. outHigherBound := newMapping.LowerBound(outIndex + 1)
  410. lowerIntersectionBound := math.Max(outLowerBound, inLowerBound)
  411. higherIntersectionBound := math.Min(outHigherBound, inHigherBound)
  412. intersectionSize := higherIntersectionBound - lowerIntersectionBound
  413. proportion := intersectionSize / inSize
  414. newStore.AddWithCount(outIndex, proportion*count)
  415. }
  416. return false
  417. })
  418. }
  419. // Reweight multiplies all values from the sketch by w, but keeps the same global distribution.
  420. // w has to be strictly greater than 0.
  421. func (s *DDSketch) Reweight(w float64) error {
  422. if w <= 0 {
  423. return errors.New("can't reweight by a negative factor")
  424. }
  425. if w == 1 {
  426. return nil
  427. }
  428. s.zeroCount *= w
  429. if err := s.positiveValueStore.Reweight(w); err != nil {
  430. return err
  431. }
  432. if err := s.negativeValueStore.Reweight(w); err != nil {
  433. return err
  434. }
  435. return nil
  436. }
  437. // DDSketchWithExactSummaryStatistics returns exact count, sum, min and max, as
  438. // opposed to DDSketch, which may return approximate values for those
  439. // statistics. Because of the need to track them exactly, adding and merging
  440. // operations are slightly more exepensive than those of DDSketch.
  441. type DDSketchWithExactSummaryStatistics struct {
  442. sketch *DDSketch
  443. summaryStatistics *stat.SummaryStatistics
  444. }
  445. func NewDefaultDDSketchWithExactSummaryStatistics(relativeAccuracy float64) (*DDSketchWithExactSummaryStatistics, error) {
  446. sketch, err := NewDefaultDDSketch(relativeAccuracy)
  447. if err != nil {
  448. return nil, err
  449. }
  450. return &DDSketchWithExactSummaryStatistics{
  451. sketch: sketch,
  452. summaryStatistics: stat.NewSummaryStatistics(),
  453. }, nil
  454. }
  455. func NewDDSketchWithExactSummaryStatistics(mapping mapping.IndexMapping, storeProvider store.Provider) *DDSketchWithExactSummaryStatistics {
  456. return &DDSketchWithExactSummaryStatistics{
  457. sketch: NewDDSketchFromStoreProvider(mapping, storeProvider),
  458. summaryStatistics: stat.NewSummaryStatistics(),
  459. }
  460. }
  461. func (s *DDSketchWithExactSummaryStatistics) RelativeAccuracy() float64 {
  462. return s.sketch.RelativeAccuracy()
  463. }
  464. func (s *DDSketchWithExactSummaryStatistics) IsEmpty() bool {
  465. return s.summaryStatistics.Count() == 0
  466. }
  467. func (s *DDSketchWithExactSummaryStatistics) GetCount() float64 {
  468. return s.summaryStatistics.Count()
  469. }
  470. func (s *DDSketchWithExactSummaryStatistics) GetSum() float64 {
  471. return s.summaryStatistics.Sum()
  472. }
  473. func (s *DDSketchWithExactSummaryStatistics) GetMinValue() (float64, error) {
  474. if s.sketch.IsEmpty() {
  475. return math.NaN(), errEmptySketch
  476. }
  477. return s.summaryStatistics.Min(), nil
  478. }
  479. func (s *DDSketchWithExactSummaryStatistics) GetMaxValue() (float64, error) {
  480. if s.sketch.IsEmpty() {
  481. return math.NaN(), errEmptySketch
  482. }
  483. return s.summaryStatistics.Max(), nil
  484. }
  485. func (s *DDSketchWithExactSummaryStatistics) GetValueAtQuantile(quantile float64) (float64, error) {
  486. value, err := s.sketch.GetValueAtQuantile(quantile)
  487. min := s.summaryStatistics.Min()
  488. if value < min {
  489. return min, err
  490. }
  491. max := s.summaryStatistics.Max()
  492. if value > max {
  493. return max, err
  494. }
  495. return value, err
  496. }
  497. func (s *DDSketchWithExactSummaryStatistics) GetValuesAtQuantiles(quantiles []float64) ([]float64, error) {
  498. values, err := s.sketch.GetValuesAtQuantiles(quantiles)
  499. min := s.summaryStatistics.Min()
  500. max := s.summaryStatistics.Max()
  501. for i := range values {
  502. if values[i] < min {
  503. values[i] = min
  504. } else if values[i] > max {
  505. values[i] = max
  506. }
  507. }
  508. return values, err
  509. }
  510. func (s *DDSketchWithExactSummaryStatistics) ForEach(f func(value, count float64) (stop bool)) {
  511. s.sketch.ForEach(f)
  512. }
  513. func (s *DDSketchWithExactSummaryStatistics) Clear() {
  514. s.sketch.Clear()
  515. s.summaryStatistics.Clear()
  516. }
  517. func (s *DDSketchWithExactSummaryStatistics) Add(value float64) error {
  518. err := s.sketch.Add(value)
  519. if err != nil {
  520. return err
  521. }
  522. s.summaryStatistics.Add(value, 1)
  523. return nil
  524. }
  525. func (s *DDSketchWithExactSummaryStatistics) AddWithCount(value, count float64) error {
  526. if count == 0 {
  527. return nil
  528. }
  529. err := s.sketch.AddWithCount(value, count)
  530. if err != nil {
  531. return err
  532. }
  533. s.summaryStatistics.Add(value, count)
  534. return nil
  535. }
  536. func (s *DDSketchWithExactSummaryStatistics) MergeWith(o *DDSketchWithExactSummaryStatistics) error {
  537. err := s.sketch.MergeWith(o.sketch)
  538. if err != nil {
  539. return err
  540. }
  541. s.summaryStatistics.MergeWith(o.summaryStatistics)
  542. return nil
  543. }
  544. func (s *DDSketchWithExactSummaryStatistics) Copy() *DDSketchWithExactSummaryStatistics {
  545. return &DDSketchWithExactSummaryStatistics{
  546. sketch: s.sketch.Copy(),
  547. summaryStatistics: s.summaryStatistics.Copy(),
  548. }
  549. }
  550. func (s *DDSketchWithExactSummaryStatistics) Reweight(factor float64) error {
  551. err := s.sketch.Reweight(factor)
  552. if err != nil {
  553. return err
  554. }
  555. s.summaryStatistics.Reweight(factor)
  556. return nil
  557. }
  558. func (s *DDSketchWithExactSummaryStatistics) ChangeMapping(newMapping mapping.IndexMapping, storeProvider store.Provider, scaleFactor float64) *DDSketchWithExactSummaryStatistics {
  559. summaryStatisticsCopy := s.summaryStatistics.Copy()
  560. summaryStatisticsCopy.Rescale(scaleFactor)
  561. return &DDSketchWithExactSummaryStatistics{
  562. sketch: s.sketch.ChangeMapping(newMapping, storeProvider(), storeProvider(), scaleFactor),
  563. summaryStatistics: summaryStatisticsCopy,
  564. }
  565. }
  566. func (s *DDSketchWithExactSummaryStatistics) Encode(b *[]byte, omitIndexMapping bool) {
  567. if s.summaryStatistics.Count() != 0 {
  568. enc.EncodeFlag(b, enc.FlagCount)
  569. enc.EncodeVarfloat64(b, s.summaryStatistics.Count())
  570. }
  571. if s.summaryStatistics.Sum() != 0 {
  572. enc.EncodeFlag(b, enc.FlagSum)
  573. enc.EncodeFloat64LE(b, s.summaryStatistics.Sum())
  574. }
  575. if s.summaryStatistics.Min() != math.Inf(1) {
  576. enc.EncodeFlag(b, enc.FlagMin)
  577. enc.EncodeFloat64LE(b, s.summaryStatistics.Min())
  578. }
  579. if s.summaryStatistics.Max() != math.Inf(-1) {
  580. enc.EncodeFlag(b, enc.FlagMax)
  581. enc.EncodeFloat64LE(b, s.summaryStatistics.Max())
  582. }
  583. s.sketch.Encode(b, omitIndexMapping)
  584. }
  585. // DecodeDDSketchWithExactSummaryStatistics deserializes a sketch.
  586. // Stores are built using storeProvider. The store type needs not match the
  587. // store that the serialized sketch initially used. However, using the same
  588. // store type may make decoding faster. In the absence of high performance
  589. // requirements, store.DefaultProvider is a sound enough choice of store
  590. // provider.
  591. // To avoid memory allocations, it is possible to use a store provider that
  592. // reuses stores, by calling Clear() on previously used stores before providing
  593. // the store.
  594. // If the serialized data does not contain the index mapping, you need to
  595. // specify the index mapping that was used in the sketch that was encoded.
  596. // Otherwise, you can use nil and the index mapping will be decoded from the
  597. // serialized data.
  598. // It is not possible to decode with this function an encoded DDSketch (unless
  599. // it is empty), because it does not track exact summary statistics
  600. func DecodeDDSketchWithExactSummaryStatistics(b []byte, storeProvider store.Provider, indexMapping mapping.IndexMapping) (*DDSketchWithExactSummaryStatistics, error) {
  601. s := &DDSketchWithExactSummaryStatistics{
  602. sketch: &DDSketch{
  603. IndexMapping: indexMapping,
  604. positiveValueStore: storeProvider(),
  605. negativeValueStore: storeProvider(),
  606. zeroCount: float64(0),
  607. },
  608. summaryStatistics: stat.NewSummaryStatistics(),
  609. }
  610. err := s.DecodeAndMergeWith(b)
  611. return s, err
  612. }
  613. func (s *DDSketchWithExactSummaryStatistics) DecodeAndMergeWith(bb []byte) error {
  614. err := s.sketch.decodeAndMergeWith(bb, func(b *[]byte, flag enc.Flag) error {
  615. switch flag {
  616. case enc.FlagCount:
  617. count, err := enc.DecodeVarfloat64(b)
  618. if err != nil {
  619. return err
  620. }
  621. s.summaryStatistics.AddToCount(count)
  622. return nil
  623. case enc.FlagSum:
  624. sum, err := enc.DecodeFloat64LE(b)
  625. if err != nil {
  626. return err
  627. }
  628. s.summaryStatistics.AddToSum(sum)
  629. return nil
  630. case enc.FlagMin, enc.FlagMax:
  631. stat, err := enc.DecodeFloat64LE(b)
  632. if err != nil {
  633. return err
  634. }
  635. s.summaryStatistics.Add(stat, 0)
  636. return nil
  637. default:
  638. return errUnknownFlag
  639. }
  640. })
  641. if err != nil {
  642. return err
  643. }
  644. // It is assumed that if the count is encoded, other exact summary
  645. // statistics are encoded as well, which is the case if Encode is used.
  646. if s.summaryStatistics.Count() == 0 && !s.sketch.IsEmpty() {
  647. return errors.New("missing exact summary statistics")
  648. }
  649. return nil
  650. }