| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715 |
- // Package roaring is an implementation of Roaring Bitmaps in Go.
- // They provide fast compressed bitmap data structures (also called bitset).
- // They are ideally suited to represent sets of integers over
- // relatively small ranges.
- // See http://roaringbitmap.org for details.
- package roaring
- import (
- "bytes"
- "encoding/base64"
- "fmt"
- "io"
- "strconv"
- "github.com/RoaringBitmap/roaring/internal"
- )
- // Bitmap represents a compressed bitmap where you can add integers.
- type Bitmap struct {
- highlowcontainer roaringArray
- }
- // ToBase64 serializes a bitmap as Base64
- func (rb *Bitmap) ToBase64() (string, error) {
- buf := new(bytes.Buffer)
- _, err := rb.WriteTo(buf)
- return base64.StdEncoding.EncodeToString(buf.Bytes()), err
- }
- // FromBase64 deserializes a bitmap from Base64
- func (rb *Bitmap) FromBase64(str string) (int64, error) {
- data, err := base64.StdEncoding.DecodeString(str)
- if err != nil {
- return 0, err
- }
- buf := bytes.NewBuffer(data)
- return rb.ReadFrom(buf)
- }
- // WriteTo writes a serialized version of this bitmap to stream.
- // The format is compatible with other RoaringBitmap
- // implementations (Java, C) and is documented here:
- // https://github.com/RoaringBitmap/RoaringFormatSpec
- func (rb *Bitmap) WriteTo(stream io.Writer) (int64, error) {
- return rb.highlowcontainer.writeTo(stream)
- }
- // ToBytes returns an array of bytes corresponding to what is written
- // when calling WriteTo
- func (rb *Bitmap) ToBytes() ([]byte, error) {
- return rb.highlowcontainer.toBytes()
- }
- // Checksum computes a hash (currently FNV-1a) for a bitmap that is suitable for
- // using bitmaps as elements in hash sets or as keys in hash maps, as well as
- // generally quicker comparisons.
- // The implementation is biased towards efficiency in little endian machines, so
- // expect some extra CPU cycles and memory to be used if your machine is big endian.
- // Likewise, don't use this to verify integrity unless you're certain you'll load
- // the bitmap on a machine with the same endianess used to create it.
- func (rb *Bitmap) Checksum() uint64 {
- const (
- offset = 14695981039346656037
- prime = 1099511628211
- )
- var bytes []byte
- hash := uint64(offset)
- bytes = uint16SliceAsByteSlice(rb.highlowcontainer.keys)
- for _, b := range bytes {
- hash ^= uint64(b)
- hash *= prime
- }
- for _, c := range rb.highlowcontainer.containers {
- // 0 separator
- hash ^= 0
- hash *= prime
- switch c := c.(type) {
- case *bitmapContainer:
- bytes = uint64SliceAsByteSlice(c.bitmap)
- case *arrayContainer:
- bytes = uint16SliceAsByteSlice(c.content)
- case *runContainer16:
- bytes = interval16SliceAsByteSlice(c.iv)
- default:
- panic("invalid container type")
- }
- if len(bytes) == 0 {
- panic("empty containers are not supported")
- }
- for _, b := range bytes {
- hash ^= uint64(b)
- hash *= prime
- }
- }
- return hash
- }
- // ReadFrom reads a serialized version of this bitmap from stream.
- // The format is compatible with other RoaringBitmap
- // implementations (Java, C) and is documented here:
- // https://github.com/RoaringBitmap/RoaringFormatSpec
- // Since io.Reader is regarded as a stream and cannot be read twice.
- // So add cookieHeader to accept the 4-byte data that has been read in roaring64.ReadFrom.
- // It is not necessary to pass cookieHeader when call roaring.ReadFrom to read the roaring32 data directly.
- func (rb *Bitmap) ReadFrom(reader io.Reader, cookieHeader ...byte) (p int64, err error) {
- stream := internal.ByteInputAdapterPool.Get().(*internal.ByteInputAdapter)
- stream.Reset(reader)
- p, err = rb.highlowcontainer.readFrom(stream, cookieHeader...)
- internal.ByteInputAdapterPool.Put(stream)
- return
- }
- // FromBuffer creates a bitmap from its serialized version stored in buffer
- //
- // The format specification is available here:
- // https://github.com/RoaringBitmap/RoaringFormatSpec
- //
- // The provided byte array (buf) is expected to be a constant.
- // The function makes the best effort attempt not to copy data.
- // You should take care not to modify buff as it will
- // likely result in unexpected program behavior.
- //
- // Resulting bitmaps are effectively immutable in the following sense:
- // a copy-on-write marker is used so that when you modify the resulting
- // bitmap, copies of selected data (containers) are made.
- // You should *not* change the copy-on-write status of the resulting
- // bitmaps (SetCopyOnWrite).
- //
- // If buf becomes unavailable, then a bitmap created with
- // FromBuffer would be effectively broken. Furthermore, any
- // bitmap derived from this bitmap (e.g., via Or, And) might
- // also be broken. Thus, before making buf unavailable, you should
- // call CloneCopyOnWriteContainers on all such bitmaps.
- //
- func (rb *Bitmap) FromBuffer(buf []byte) (p int64, err error) {
- stream := internal.ByteBufferPool.Get().(*internal.ByteBuffer)
- stream.Reset(buf)
- p, err = rb.highlowcontainer.readFrom(stream)
- internal.ByteBufferPool.Put(stream)
- return
- }
- // RunOptimize attempts to further compress the runs of consecutive values found in the bitmap
- func (rb *Bitmap) RunOptimize() {
- rb.highlowcontainer.runOptimize()
- }
- // HasRunCompression returns true if the bitmap benefits from run compression
- func (rb *Bitmap) HasRunCompression() bool {
- return rb.highlowcontainer.hasRunCompression()
- }
- // MarshalBinary implements the encoding.BinaryMarshaler interface for the bitmap
- // (same as ToBytes)
- func (rb *Bitmap) MarshalBinary() ([]byte, error) {
- return rb.ToBytes()
- }
- // UnmarshalBinary implements the encoding.BinaryUnmarshaler interface for the bitmap
- func (rb *Bitmap) UnmarshalBinary(data []byte) error {
- r := bytes.NewReader(data)
- _, err := rb.ReadFrom(r)
- return err
- }
- // NewBitmap creates a new empty Bitmap (see also New)
- func NewBitmap() *Bitmap {
- return &Bitmap{}
- }
- // New creates a new empty Bitmap (same as NewBitmap)
- func New() *Bitmap {
- return &Bitmap{}
- }
- // Clear resets the Bitmap to be logically empty, but may retain
- // some memory allocations that may speed up future operations
- func (rb *Bitmap) Clear() {
- rb.highlowcontainer.clear()
- }
- // ToArray creates a new slice containing all of the integers stored in the Bitmap in sorted order
- func (rb *Bitmap) ToArray() []uint32 {
- array := make([]uint32, rb.GetCardinality())
- pos := 0
- pos2 := 0
- for pos < rb.highlowcontainer.size() {
- hs := uint32(rb.highlowcontainer.getKeyAtIndex(pos)) << 16
- c := rb.highlowcontainer.getContainerAtIndex(pos)
- pos++
- pos2 = c.fillLeastSignificant16bits(array, pos2, hs)
- }
- return array
- }
- // GetSizeInBytes estimates the memory usage of the Bitmap. Note that this
- // might differ slightly from the amount of bytes required for persistent storage
- func (rb *Bitmap) GetSizeInBytes() uint64 {
- size := uint64(8)
- for _, c := range rb.highlowcontainer.containers {
- size += uint64(2) + uint64(c.getSizeInBytes())
- }
- return size
- }
- // GetSerializedSizeInBytes computes the serialized size in bytes
- // of the Bitmap. It should correspond to the
- // number of bytes written when invoking WriteTo. You can expect
- // that this function is much cheaper computationally than WriteTo.
- func (rb *Bitmap) GetSerializedSizeInBytes() uint64 {
- return rb.highlowcontainer.serializedSizeInBytes()
- }
- // BoundSerializedSizeInBytes returns an upper bound on the serialized size in bytes
- // assuming that one wants to store "cardinality" integers in [0, universe_size)
- func BoundSerializedSizeInBytes(cardinality uint64, universeSize uint64) uint64 {
- contnbr := (universeSize + uint64(65535)) / uint64(65536)
- if contnbr > cardinality {
- contnbr = cardinality
- // we can't have more containers than we have values
- }
- headermax := 8*contnbr + 4
- if 4 > (contnbr+7)/8 {
- headermax += 4
- } else {
- headermax += (contnbr + 7) / 8
- }
- valsarray := uint64(arrayContainerSizeInBytes(int(cardinality)))
- valsbitmap := contnbr * uint64(bitmapContainerSizeInBytes())
- valsbest := valsarray
- if valsbest > valsbitmap {
- valsbest = valsbitmap
- }
- return valsbest + headermax
- }
- // IntIterable allows you to iterate over the values in a Bitmap
- type IntIterable interface {
- HasNext() bool
- Next() uint32
- }
- // IntPeekable allows you to look at the next value without advancing and
- // advance as long as the next value is smaller than minval
- type IntPeekable interface {
- IntIterable
- // PeekNext peeks the next value without advancing the iterator
- PeekNext() uint32
- // AdvanceIfNeeded advances as long as the next value is smaller than minval
- AdvanceIfNeeded(minval uint32)
- }
- type intIterator struct {
- pos int
- hs uint32
- iter shortPeekable
- highlowcontainer *roaringArray
- // These embedded iterators per container type help reduce load in the GC.
- // This way, instead of making up-to 64k allocations per full iteration
- // we get a single allocation and simply reinitialize the appropriate
- // iterator and point to it in the generic `iter` member on each key bound.
- shortIter shortIterator
- runIter runIterator16
- bitmapIter bitmapContainerShortIterator
- }
- // HasNext returns true if there are more integers to iterate over
- func (ii *intIterator) HasNext() bool {
- return ii.pos < ii.highlowcontainer.size()
- }
- func (ii *intIterator) init() {
- if ii.highlowcontainer.size() > ii.pos {
- ii.hs = uint32(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 16
- c := ii.highlowcontainer.getContainerAtIndex(ii.pos)
- switch t := c.(type) {
- case *arrayContainer:
- ii.shortIter = shortIterator{t.content, 0}
- ii.iter = &ii.shortIter
- case *runContainer16:
- ii.runIter = runIterator16{rc: t, curIndex: 0, curPosInIndex: 0}
- ii.iter = &ii.runIter
- case *bitmapContainer:
- ii.bitmapIter = bitmapContainerShortIterator{t, t.NextSetBit(0)}
- ii.iter = &ii.bitmapIter
- }
- }
- }
- // Next returns the next integer
- func (ii *intIterator) Next() uint32 {
- x := uint32(ii.iter.next()) | ii.hs
- if !ii.iter.hasNext() {
- ii.pos = ii.pos + 1
- ii.init()
- }
- return x
- }
- // PeekNext peeks the next value without advancing the iterator
- func (ii *intIterator) PeekNext() uint32 {
- return uint32(ii.iter.peekNext()&maxLowBit) | ii.hs
- }
- // AdvanceIfNeeded advances as long as the next value is smaller than minval
- func (ii *intIterator) AdvanceIfNeeded(minval uint32) {
- to := minval & 0xffff0000
- for ii.HasNext() && ii.hs < to {
- ii.pos++
- ii.init()
- }
- if ii.HasNext() && ii.hs == to {
- ii.iter.advanceIfNeeded(lowbits(minval))
- if !ii.iter.hasNext() {
- ii.pos++
- ii.init()
- }
- }
- }
- // IntIterator is meant to allow you to iterate through the values of a bitmap, see Initialize(a *Bitmap)
- type IntIterator = intIterator
- // Initialize configures the existing iterator so that it can iterate through the values of
- // the provided bitmap.
- // The iteration results are undefined if the bitmap is modified (e.g., with Add or Remove).
- func (p *intIterator) Initialize(a *Bitmap) {
- p.pos = 0
- p.highlowcontainer = &a.highlowcontainer
- p.init()
- }
- type intReverseIterator struct {
- pos int
- hs uint32
- iter shortIterable
- highlowcontainer *roaringArray
- shortIter reverseIterator
- runIter runReverseIterator16
- bitmapIter reverseBitmapContainerShortIterator
- }
- // HasNext returns true if there are more integers to iterate over
- func (ii *intReverseIterator) HasNext() bool {
- return ii.pos >= 0
- }
- func (ii *intReverseIterator) init() {
- if ii.pos >= 0 {
- ii.hs = uint32(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 16
- c := ii.highlowcontainer.getContainerAtIndex(ii.pos)
- switch t := c.(type) {
- case *arrayContainer:
- ii.shortIter = reverseIterator{t.content, len(t.content) - 1}
- ii.iter = &ii.shortIter
- case *runContainer16:
- index := int(len(t.iv)) - 1
- pos := uint16(0)
- if index >= 0 {
- pos = t.iv[index].length
- }
- ii.runIter = runReverseIterator16{rc: t, curIndex: index, curPosInIndex: pos}
- ii.iter = &ii.runIter
- case *bitmapContainer:
- pos := -1
- if t.cardinality > 0 {
- pos = int(t.maximum())
- }
- ii.bitmapIter = reverseBitmapContainerShortIterator{t, pos}
- ii.iter = &ii.bitmapIter
- }
- } else {
- ii.iter = nil
- }
- }
- // Next returns the next integer
- func (ii *intReverseIterator) Next() uint32 {
- x := uint32(ii.iter.next()) | ii.hs
- if !ii.iter.hasNext() {
- ii.pos = ii.pos - 1
- ii.init()
- }
- return x
- }
- // IntReverseIterator is meant to allow you to iterate through the values of a bitmap, see Initialize(a *Bitmap)
- type IntReverseIterator = intReverseIterator
- // Initialize configures the existing iterator so that it can iterate through the values of
- // the provided bitmap.
- // The iteration results are undefined if the bitmap is modified (e.g., with Add or Remove).
- func (p *intReverseIterator) Initialize(a *Bitmap) {
- p.highlowcontainer = &a.highlowcontainer
- p.pos = a.highlowcontainer.size() - 1
- p.init()
- }
- // ManyIntIterable allows you to iterate over the values in a Bitmap
- type ManyIntIterable interface {
- // NextMany fills buf up with values, returns how many values were returned
- NextMany(buf []uint32) int
- // NextMany64 fills up buf with 64 bit values, uses hs as a mask (OR), returns how many values were returned
- NextMany64(hs uint64, buf []uint64) int
- }
- type manyIntIterator struct {
- pos int
- hs uint32
- iter manyIterable
- highlowcontainer *roaringArray
- shortIter shortIterator
- runIter runIterator16
- bitmapIter bitmapContainerManyIterator
- }
- func (ii *manyIntIterator) init() {
- if ii.highlowcontainer.size() > ii.pos {
- ii.hs = uint32(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 16
- c := ii.highlowcontainer.getContainerAtIndex(ii.pos)
- switch t := c.(type) {
- case *arrayContainer:
- ii.shortIter = shortIterator{t.content, 0}
- ii.iter = &ii.shortIter
- case *runContainer16:
- ii.runIter = runIterator16{rc: t, curIndex: 0, curPosInIndex: 0}
- ii.iter = &ii.runIter
- case *bitmapContainer:
- ii.bitmapIter = bitmapContainerManyIterator{t, -1, 0}
- ii.iter = &ii.bitmapIter
- }
- } else {
- ii.iter = nil
- }
- }
- func (ii *manyIntIterator) NextMany(buf []uint32) int {
- n := 0
- for n < len(buf) {
- if ii.iter == nil {
- break
- }
- moreN := ii.iter.nextMany(ii.hs, buf[n:])
- n += moreN
- if moreN == 0 {
- ii.pos = ii.pos + 1
- ii.init()
- }
- }
- return n
- }
- func (ii *manyIntIterator) NextMany64(hs64 uint64, buf []uint64) int {
- n := 0
- for n < len(buf) {
- if ii.iter == nil {
- break
- }
- hs := uint64(ii.hs) | hs64
- moreN := ii.iter.nextMany64(hs, buf[n:])
- n += moreN
- if moreN == 0 {
- ii.pos = ii.pos + 1
- ii.init()
- }
- }
- return n
- }
- // ManyIntIterator is meant to allow you to iterate through the values of a bitmap, see Initialize(a *Bitmap)
- type ManyIntIterator = manyIntIterator
- // Initialize configures the existing iterator so that it can iterate through the values of
- // the provided bitmap.
- // The iteration results are undefined if the bitmap is modified (e.g., with Add or Remove).
- func (p *manyIntIterator) Initialize(a *Bitmap) {
- p.pos = 0
- p.highlowcontainer = &a.highlowcontainer
- p.init()
- }
- // String creates a string representation of the Bitmap
- func (rb *Bitmap) String() string {
- // inspired by https://github.com/fzandona/goroar/
- var buffer bytes.Buffer
- start := []byte("{")
- buffer.Write(start)
- i := rb.Iterator()
- counter := 0
- if i.HasNext() {
- counter = counter + 1
- buffer.WriteString(strconv.FormatInt(int64(i.Next()), 10))
- }
- for i.HasNext() {
- buffer.WriteString(",")
- counter = counter + 1
- // to avoid exhausting the memory
- if counter > 0x40000 {
- buffer.WriteString("...")
- break
- }
- buffer.WriteString(strconv.FormatInt(int64(i.Next()), 10))
- }
- buffer.WriteString("}")
- return buffer.String()
- }
- // Iterate iterates over the bitmap, calling the given callback with each value in the bitmap. If the callback returns
- // false, the iteration is halted.
- // The iteration results are undefined if the bitmap is modified (e.g., with Add or Remove).
- // There is no guarantee as to what order the values will be iterated.
- func (rb *Bitmap) Iterate(cb func(x uint32) bool) {
- for i := 0; i < rb.highlowcontainer.size(); i++ {
- hs := uint32(rb.highlowcontainer.getKeyAtIndex(i)) << 16
- c := rb.highlowcontainer.getContainerAtIndex(i)
- var shouldContinue bool
- // This is hacky but it avoids allocations from invoking an interface method with a closure
- switch t := c.(type) {
- case *arrayContainer:
- shouldContinue = t.iterate(func(x uint16) bool {
- return cb(uint32(x) | hs)
- })
- case *runContainer16:
- shouldContinue = t.iterate(func(x uint16) bool {
- return cb(uint32(x) | hs)
- })
- case *bitmapContainer:
- shouldContinue = t.iterate(func(x uint16) bool {
- return cb(uint32(x) | hs)
- })
- }
- if !shouldContinue {
- break
- }
- }
- }
- // Iterator creates a new IntPeekable to iterate over the integers contained in the bitmap, in sorted order;
- // the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove).
- func (rb *Bitmap) Iterator() IntPeekable {
- p := new(intIterator)
- p.Initialize(rb)
- return p
- }
- // ReverseIterator creates a new IntIterable to iterate over the integers contained in the bitmap, in sorted order;
- // the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove).
- func (rb *Bitmap) ReverseIterator() IntIterable {
- p := new(intReverseIterator)
- p.Initialize(rb)
- return p
- }
- // ManyIterator creates a new ManyIntIterable to iterate over the integers contained in the bitmap, in sorted order;
- // the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove).
- func (rb *Bitmap) ManyIterator() ManyIntIterable {
- p := new(manyIntIterator)
- p.Initialize(rb)
- return p
- }
- // Clone creates a copy of the Bitmap
- func (rb *Bitmap) Clone() *Bitmap {
- ptr := new(Bitmap)
- ptr.highlowcontainer = *rb.highlowcontainer.clone()
- return ptr
- }
- // Minimum get the smallest value stored in this roaring bitmap, assumes that it is not empty
- func (rb *Bitmap) Minimum() uint32 {
- if len(rb.highlowcontainer.containers) == 0 {
- panic("Empty bitmap")
- }
- return uint32(rb.highlowcontainer.containers[0].minimum()) | (uint32(rb.highlowcontainer.keys[0]) << 16)
- }
- // Maximum get the largest value stored in this roaring bitmap, assumes that it is not empty
- func (rb *Bitmap) Maximum() uint32 {
- if len(rb.highlowcontainer.containers) == 0 {
- panic("Empty bitmap")
- }
- lastindex := len(rb.highlowcontainer.containers) - 1
- return uint32(rb.highlowcontainer.containers[lastindex].maximum()) | (uint32(rb.highlowcontainer.keys[lastindex]) << 16)
- }
- // Contains returns true if the integer is contained in the bitmap
- func (rb *Bitmap) Contains(x uint32) bool {
- hb := highbits(x)
- c := rb.highlowcontainer.getContainer(hb)
- return c != nil && c.contains(lowbits(x))
- }
- // ContainsInt returns true if the integer is contained in the bitmap (this is a convenience method, the parameter is casted to uint32 and Contains is called)
- func (rb *Bitmap) ContainsInt(x int) bool {
- return rb.Contains(uint32(x))
- }
- // Equals returns true if the two bitmaps contain the same integers
- func (rb *Bitmap) Equals(o interface{}) bool {
- srb, ok := o.(*Bitmap)
- if ok {
- return srb.highlowcontainer.equals(rb.highlowcontainer)
- }
- return false
- }
- // AddOffset adds the value 'offset' to each and every value in a bitmap, generating a new bitmap in the process
- func AddOffset(x *Bitmap, offset uint32) (answer *Bitmap) {
- return AddOffset64(x, int64(offset))
- }
- // AddOffset64 adds the value 'offset' to each and every value in a bitmap, generating a new bitmap in the process
- // If offset + element is outside of the range [0,2^32), that the element will be dropped
- func AddOffset64(x *Bitmap, offset int64) (answer *Bitmap) {
- // we need "offset" to be a long because we want to support values
- // between -0xFFFFFFFF up to +-0xFFFFFFFF
- var containerOffset64 int64
- if offset < 0 {
- containerOffset64 = (offset - (1 << 16) + 1) / (1 << 16)
- } else {
- containerOffset64 = offset >> 16
- }
- answer = New()
- if containerOffset64 >= (1<<16) || containerOffset64 < -(1<<16) {
- return answer
- }
- containerOffset := int32(containerOffset64)
- inOffset := (uint16)(offset - containerOffset64*(1<<16))
- if inOffset == 0 {
- for pos := 0; pos < x.highlowcontainer.size(); pos++ {
- key := int32(x.highlowcontainer.getKeyAtIndex(pos))
- key += containerOffset
- if key >= 0 && key <= MaxUint16 {
- c := x.highlowcontainer.getContainerAtIndex(pos).clone()
- answer.highlowcontainer.appendContainer(uint16(key), c, false)
- }
- }
- } else {
- for pos := 0; pos < x.highlowcontainer.size(); pos++ {
- key := int32(x.highlowcontainer.getKeyAtIndex(pos))
- key += containerOffset
- if key+1 < 0 || key > MaxUint16 {
- continue
- }
- c := x.highlowcontainer.getContainerAtIndex(pos)
- lo, hi := c.addOffset(inOffset)
- if lo != nil && key >= 0 {
- curSize := answer.highlowcontainer.size()
- lastkey := int32(0)
- if curSize > 0 {
- lastkey = int32(answer.highlowcontainer.getKeyAtIndex(curSize - 1))
- }
- if curSize > 0 && lastkey == key {
- prev := answer.highlowcontainer.getContainerAtIndex(curSize - 1)
- orresult := prev.ior(lo)
- answer.highlowcontainer.setContainerAtIndex(curSize-1, orresult)
- } else {
- answer.highlowcontainer.appendContainer(uint16(key), lo, false)
- }
- }
- if hi != nil && key+1 <= MaxUint16 {
- answer.highlowcontainer.appendContainer(uint16(key+1), hi, false)
- }
- }
- }
- return answer
- }
- // Add the integer x to the bitmap
- func (rb *Bitmap) Add(x uint32) {
- hb := highbits(x)
- ra := &rb.highlowcontainer
- i := ra.getIndex(hb)
- if i >= 0 {
- var c container
- c = ra.getWritableContainerAtIndex(i).iaddReturnMinimized(lowbits(x))
- rb.highlowcontainer.setContainerAtIndex(i, c)
- } else {
- newac := newArrayContainer()
- rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, newac.iaddReturnMinimized(lowbits(x)))
- }
- }
- // add the integer x to the bitmap, return the container and its index
- func (rb *Bitmap) addwithptr(x uint32) (int, container) {
- hb := highbits(x)
- ra := &rb.highlowcontainer
- i := ra.getIndex(hb)
- var c container
- if i >= 0 {
- c = ra.getWritableContainerAtIndex(i).iaddReturnMinimized(lowbits(x))
- rb.highlowcontainer.setContainerAtIndex(i, c)
- return i, c
- }
- newac := newArrayContainer()
- c = newac.iaddReturnMinimized(lowbits(x))
- rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, c)
- return -i - 1, c
- }
- // CheckedAdd adds the integer x to the bitmap and return true if it was added (false if the integer was already present)
- func (rb *Bitmap) CheckedAdd(x uint32) bool {
- // TODO: add unit tests for this method
- hb := highbits(x)
- i := rb.highlowcontainer.getIndex(hb)
- if i >= 0 {
- C := rb.highlowcontainer.getWritableContainerAtIndex(i)
- oldcard := C.getCardinality()
- C = C.iaddReturnMinimized(lowbits(x))
- rb.highlowcontainer.setContainerAtIndex(i, C)
- return C.getCardinality() > oldcard
- }
- newac := newArrayContainer()
- rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, newac.iaddReturnMinimized(lowbits(x)))
- return true
- }
- // AddInt adds the integer x to the bitmap (convenience method: the parameter is casted to uint32 and we call Add)
- func (rb *Bitmap) AddInt(x int) {
- rb.Add(uint32(x))
- }
- // Remove the integer x from the bitmap
- func (rb *Bitmap) Remove(x uint32) {
- hb := highbits(x)
- i := rb.highlowcontainer.getIndex(hb)
- if i >= 0 {
- c := rb.highlowcontainer.getWritableContainerAtIndex(i).iremoveReturnMinimized(lowbits(x))
- rb.highlowcontainer.setContainerAtIndex(i, c)
- if rb.highlowcontainer.getContainerAtIndex(i).isEmpty() {
- rb.highlowcontainer.removeAtIndex(i)
- }
- }
- }
- // CheckedRemove removes the integer x from the bitmap and return true if the integer was effectively removed (and false if the integer was not present)
- func (rb *Bitmap) CheckedRemove(x uint32) bool {
- // TODO: add unit tests for this method
- hb := highbits(x)
- i := rb.highlowcontainer.getIndex(hb)
- if i >= 0 {
- C := rb.highlowcontainer.getWritableContainerAtIndex(i)
- oldcard := C.getCardinality()
- C = C.iremoveReturnMinimized(lowbits(x))
- rb.highlowcontainer.setContainerAtIndex(i, C)
- if rb.highlowcontainer.getContainerAtIndex(i).isEmpty() {
- rb.highlowcontainer.removeAtIndex(i)
- return true
- }
- return C.getCardinality() < oldcard
- }
- return false
- }
- // IsEmpty returns true if the Bitmap is empty (it is faster than doing (GetCardinality() == 0))
- func (rb *Bitmap) IsEmpty() bool {
- return rb.highlowcontainer.size() == 0
- }
- // GetCardinality returns the number of integers contained in the bitmap
- func (rb *Bitmap) GetCardinality() uint64 {
- size := uint64(0)
- for _, c := range rb.highlowcontainer.containers {
- size += uint64(c.getCardinality())
- }
- return size
- }
- // Rank returns the number of integers that are smaller or equal to x (Rank(infinity) would be GetCardinality()).
- // If you pass the smallest value, you get the value 1. If you pass a value that is smaller than the smallest
- // value, you get 0. Note that this function differs in convention from the Select function since it
- // return 1 and not 0 on the smallest value.
- func (rb *Bitmap) Rank(x uint32) uint64 {
- size := uint64(0)
- for i := 0; i < rb.highlowcontainer.size(); i++ {
- key := rb.highlowcontainer.getKeyAtIndex(i)
- if key > highbits(x) {
- return size
- }
- if key < highbits(x) {
- size += uint64(rb.highlowcontainer.getContainerAtIndex(i).getCardinality())
- } else {
- return size + uint64(rb.highlowcontainer.getContainerAtIndex(i).rank(lowbits(x)))
- }
- }
- return size
- }
- // Select returns the xth integer in the bitmap. If you pass 0, you get
- // the smallest element. Note that this function differs in convention from
- // the Rank function which returns 1 on the smallest value.
- func (rb *Bitmap) Select(x uint32) (uint32, error) {
- remaining := x
- for i := 0; i < rb.highlowcontainer.size(); i++ {
- c := rb.highlowcontainer.getContainerAtIndex(i)
- card := uint32(c.getCardinality())
- if remaining >= card {
- remaining -= card
- } else {
- key := rb.highlowcontainer.getKeyAtIndex(i)
- return uint32(key)<<16 + uint32(c.selectInt(uint16(remaining))), nil
- }
- }
- return 0, fmt.Errorf("can't find %dth integer in a bitmap with only %d items", x, rb.GetCardinality())
- }
- // And computes the intersection between two bitmaps and stores the result in the current bitmap
- func (rb *Bitmap) And(x2 *Bitmap) {
- pos1 := 0
- pos2 := 0
- intersectionsize := 0
- length1 := rb.highlowcontainer.size()
- length2 := x2.highlowcontainer.size()
- main:
- for {
- if pos1 < length1 && pos2 < length2 {
- s1 := rb.highlowcontainer.getKeyAtIndex(pos1)
- s2 := x2.highlowcontainer.getKeyAtIndex(pos2)
- for {
- if s1 == s2 {
- c1 := rb.highlowcontainer.getWritableContainerAtIndex(pos1)
- c2 := x2.highlowcontainer.getContainerAtIndex(pos2)
- diff := c1.iand(c2)
- if !diff.isEmpty() {
- rb.highlowcontainer.replaceKeyAndContainerAtIndex(intersectionsize, s1, diff, false)
- intersectionsize++
- }
- pos1++
- pos2++
- if (pos1 == length1) || (pos2 == length2) {
- break main
- }
- s1 = rb.highlowcontainer.getKeyAtIndex(pos1)
- s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
- } else if s1 < s2 {
- pos1 = rb.highlowcontainer.advanceUntil(s2, pos1)
- if pos1 == length1 {
- break main
- }
- s1 = rb.highlowcontainer.getKeyAtIndex(pos1)
- } else { //s1 > s2
- pos2 = x2.highlowcontainer.advanceUntil(s1, pos2)
- if pos2 == length2 {
- break main
- }
- s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
- }
- }
- } else {
- break
- }
- }
- rb.highlowcontainer.resize(intersectionsize)
- }
- // OrCardinality returns the cardinality of the union between two bitmaps, bitmaps are not modified
- func (rb *Bitmap) OrCardinality(x2 *Bitmap) uint64 {
- pos1 := 0
- pos2 := 0
- length1 := rb.highlowcontainer.size()
- length2 := x2.highlowcontainer.size()
- answer := uint64(0)
- main:
- for {
- if (pos1 < length1) && (pos2 < length2) {
- s1 := rb.highlowcontainer.getKeyAtIndex(pos1)
- s2 := x2.highlowcontainer.getKeyAtIndex(pos2)
- for {
- if s1 < s2 {
- answer += uint64(rb.highlowcontainer.getContainerAtIndex(pos1).getCardinality())
- pos1++
- if pos1 == length1 {
- break main
- }
- s1 = rb.highlowcontainer.getKeyAtIndex(pos1)
- } else if s1 > s2 {
- answer += uint64(x2.highlowcontainer.getContainerAtIndex(pos2).getCardinality())
- pos2++
- if pos2 == length2 {
- break main
- }
- s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
- } else {
- // TODO: could be faster if we did not have to materialize the container
- answer += uint64(rb.highlowcontainer.getContainerAtIndex(pos1).or(x2.highlowcontainer.getContainerAtIndex(pos2)).getCardinality())
- pos1++
- pos2++
- if (pos1 == length1) || (pos2 == length2) {
- break main
- }
- s1 = rb.highlowcontainer.getKeyAtIndex(pos1)
- s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
- }
- }
- } else {
- break
- }
- }
- for ; pos1 < length1; pos1++ {
- answer += uint64(rb.highlowcontainer.getContainerAtIndex(pos1).getCardinality())
- }
- for ; pos2 < length2; pos2++ {
- answer += uint64(x2.highlowcontainer.getContainerAtIndex(pos2).getCardinality())
- }
- return answer
- }
- // AndCardinality returns the cardinality of the intersection between two bitmaps, bitmaps are not modified
- func (rb *Bitmap) AndCardinality(x2 *Bitmap) uint64 {
- pos1 := 0
- pos2 := 0
- answer := uint64(0)
- length1 := rb.highlowcontainer.size()
- length2 := x2.highlowcontainer.size()
- main:
- for {
- if pos1 < length1 && pos2 < length2 {
- s1 := rb.highlowcontainer.getKeyAtIndex(pos1)
- s2 := x2.highlowcontainer.getKeyAtIndex(pos2)
- for {
- if s1 == s2 {
- c1 := rb.highlowcontainer.getContainerAtIndex(pos1)
- c2 := x2.highlowcontainer.getContainerAtIndex(pos2)
- answer += uint64(c1.andCardinality(c2))
- pos1++
- pos2++
- if (pos1 == length1) || (pos2 == length2) {
- break main
- }
- s1 = rb.highlowcontainer.getKeyAtIndex(pos1)
- s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
- } else if s1 < s2 {
- pos1 = rb.highlowcontainer.advanceUntil(s2, pos1)
- if pos1 == length1 {
- break main
- }
- s1 = rb.highlowcontainer.getKeyAtIndex(pos1)
- } else { //s1 > s2
- pos2 = x2.highlowcontainer.advanceUntil(s1, pos2)
- if pos2 == length2 {
- break main
- }
- s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
- }
- }
- } else {
- break
- }
- }
- return answer
- }
- // IntersectsWithInterval checks whether a bitmap 'rb' and an open interval '[x,y)' intersect.
- func (rb *Bitmap) IntersectsWithInterval(x, y uint64) bool {
- if x >= y {
- return false
- }
- if x > MaxUint32 {
- return false
- }
- it := intIterator{}
- it.Initialize(rb)
- it.AdvanceIfNeeded(uint32(x))
- if !it.HasNext() {
- return false
- }
- if uint64(it.Next()) >= y {
- return false
- }
- return true
- }
- // Intersects checks whether two bitmap intersects, bitmaps are not modified
- func (rb *Bitmap) Intersects(x2 *Bitmap) bool {
- pos1 := 0
- pos2 := 0
- length1 := rb.highlowcontainer.size()
- length2 := x2.highlowcontainer.size()
- main:
- for {
- if pos1 < length1 && pos2 < length2 {
- s1 := rb.highlowcontainer.getKeyAtIndex(pos1)
- s2 := x2.highlowcontainer.getKeyAtIndex(pos2)
- for {
- if s1 == s2 {
- c1 := rb.highlowcontainer.getContainerAtIndex(pos1)
- c2 := x2.highlowcontainer.getContainerAtIndex(pos2)
- if c1.intersects(c2) {
- return true
- }
- pos1++
- pos2++
- if (pos1 == length1) || (pos2 == length2) {
- break main
- }
- s1 = rb.highlowcontainer.getKeyAtIndex(pos1)
- s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
- } else if s1 < s2 {
- pos1 = rb.highlowcontainer.advanceUntil(s2, pos1)
- if pos1 == length1 {
- break main
- }
- s1 = rb.highlowcontainer.getKeyAtIndex(pos1)
- } else { //s1 > s2
- pos2 = x2.highlowcontainer.advanceUntil(s1, pos2)
- if pos2 == length2 {
- break main
- }
- s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
- }
- }
- } else {
- break
- }
- }
- return false
- }
- // Xor computes the symmetric difference between two bitmaps and stores the result in the current bitmap
- func (rb *Bitmap) Xor(x2 *Bitmap) {
- pos1 := 0
- pos2 := 0
- length1 := rb.highlowcontainer.size()
- length2 := x2.highlowcontainer.size()
- for {
- if (pos1 < length1) && (pos2 < length2) {
- s1 := rb.highlowcontainer.getKeyAtIndex(pos1)
- s2 := x2.highlowcontainer.getKeyAtIndex(pos2)
- if s1 < s2 {
- pos1 = rb.highlowcontainer.advanceUntil(s2, pos1)
- if pos1 == length1 {
- break
- }
- } else if s1 > s2 {
- c := x2.highlowcontainer.getWritableContainerAtIndex(pos2)
- rb.highlowcontainer.insertNewKeyValueAt(pos1, x2.highlowcontainer.getKeyAtIndex(pos2), c)
- length1++
- pos1++
- pos2++
- } else {
- // TODO: couple be computed in-place for reduced memory usage
- c := rb.highlowcontainer.getContainerAtIndex(pos1).xor(x2.highlowcontainer.getContainerAtIndex(pos2))
- if !c.isEmpty() {
- rb.highlowcontainer.setContainerAtIndex(pos1, c)
- pos1++
- } else {
- rb.highlowcontainer.removeAtIndex(pos1)
- length1--
- }
- pos2++
- }
- } else {
- break
- }
- }
- if pos1 == length1 {
- rb.highlowcontainer.appendCopyMany(x2.highlowcontainer, pos2, length2)
- }
- }
- // Or computes the union between two bitmaps and stores the result in the current bitmap
- func (rb *Bitmap) Or(x2 *Bitmap) {
- pos1 := 0
- pos2 := 0
- length1 := rb.highlowcontainer.size()
- length2 := x2.highlowcontainer.size()
- main:
- for (pos1 < length1) && (pos2 < length2) {
- s1 := rb.highlowcontainer.getKeyAtIndex(pos1)
- s2 := x2.highlowcontainer.getKeyAtIndex(pos2)
- for {
- if s1 < s2 {
- pos1++
- if pos1 == length1 {
- break main
- }
- s1 = rb.highlowcontainer.getKeyAtIndex(pos1)
- } else if s1 > s2 {
- rb.highlowcontainer.insertNewKeyValueAt(pos1, s2, x2.highlowcontainer.getContainerAtIndex(pos2).clone())
- pos1++
- length1++
- pos2++
- if pos2 == length2 {
- break main
- }
- s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
- } else {
- rb.highlowcontainer.replaceKeyAndContainerAtIndex(pos1, s1, rb.highlowcontainer.getUnionedWritableContainer(pos1, x2.highlowcontainer.getContainerAtIndex(pos2)), false)
- pos1++
- pos2++
- if (pos1 == length1) || (pos2 == length2) {
- break main
- }
- s1 = rb.highlowcontainer.getKeyAtIndex(pos1)
- s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
- }
- }
- }
- if pos1 == length1 {
- rb.highlowcontainer.appendCopyMany(x2.highlowcontainer, pos2, length2)
- }
- }
- // AndNot computes the difference between two bitmaps and stores the result in the current bitmap
- func (rb *Bitmap) AndNot(x2 *Bitmap) {
- pos1 := 0
- pos2 := 0
- intersectionsize := 0
- length1 := rb.highlowcontainer.size()
- length2 := x2.highlowcontainer.size()
- main:
- for {
- if pos1 < length1 && pos2 < length2 {
- s1 := rb.highlowcontainer.getKeyAtIndex(pos1)
- s2 := x2.highlowcontainer.getKeyAtIndex(pos2)
- for {
- if s1 == s2 {
- c1 := rb.highlowcontainer.getWritableContainerAtIndex(pos1)
- c2 := x2.highlowcontainer.getContainerAtIndex(pos2)
- diff := c1.iandNot(c2)
- if !diff.isEmpty() {
- rb.highlowcontainer.replaceKeyAndContainerAtIndex(intersectionsize, s1, diff, false)
- intersectionsize++
- }
- pos1++
- pos2++
- if (pos1 == length1) || (pos2 == length2) {
- break main
- }
- s1 = rb.highlowcontainer.getKeyAtIndex(pos1)
- s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
- } else if s1 < s2 {
- c1 := rb.highlowcontainer.getContainerAtIndex(pos1)
- mustCopyOnWrite := rb.highlowcontainer.needsCopyOnWrite(pos1)
- rb.highlowcontainer.replaceKeyAndContainerAtIndex(intersectionsize, s1, c1, mustCopyOnWrite)
- intersectionsize++
- pos1++
- if pos1 == length1 {
- break main
- }
- s1 = rb.highlowcontainer.getKeyAtIndex(pos1)
- } else { //s1 > s2
- pos2 = x2.highlowcontainer.advanceUntil(s1, pos2)
- if pos2 == length2 {
- break main
- }
- s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
- }
- }
- } else {
- break
- }
- }
- // TODO:implement as a copy
- for pos1 < length1 {
- c1 := rb.highlowcontainer.getContainerAtIndex(pos1)
- s1 := rb.highlowcontainer.getKeyAtIndex(pos1)
- mustCopyOnWrite := rb.highlowcontainer.needsCopyOnWrite(pos1)
- rb.highlowcontainer.replaceKeyAndContainerAtIndex(intersectionsize, s1, c1, mustCopyOnWrite)
- intersectionsize++
- pos1++
- }
- rb.highlowcontainer.resize(intersectionsize)
- }
- // Or computes the union between two bitmaps and returns the result
- func Or(x1, x2 *Bitmap) *Bitmap {
- answer := NewBitmap()
- pos1 := 0
- pos2 := 0
- length1 := x1.highlowcontainer.size()
- length2 := x2.highlowcontainer.size()
- main:
- for (pos1 < length1) && (pos2 < length2) {
- s1 := x1.highlowcontainer.getKeyAtIndex(pos1)
- s2 := x2.highlowcontainer.getKeyAtIndex(pos2)
- for {
- if s1 < s2 {
- answer.highlowcontainer.appendCopy(x1.highlowcontainer, pos1)
- pos1++
- if pos1 == length1 {
- break main
- }
- s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
- } else if s1 > s2 {
- answer.highlowcontainer.appendCopy(x2.highlowcontainer, pos2)
- pos2++
- if pos2 == length2 {
- break main
- }
- s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
- } else {
- answer.highlowcontainer.appendContainer(s1, x1.highlowcontainer.getContainerAtIndex(pos1).or(x2.highlowcontainer.getContainerAtIndex(pos2)), false)
- pos1++
- pos2++
- if (pos1 == length1) || (pos2 == length2) {
- break main
- }
- s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
- s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
- }
- }
- }
- if pos1 == length1 {
- answer.highlowcontainer.appendCopyMany(x2.highlowcontainer, pos2, length2)
- } else if pos2 == length2 {
- answer.highlowcontainer.appendCopyMany(x1.highlowcontainer, pos1, length1)
- }
- return answer
- }
- // And computes the intersection between two bitmaps and returns the result
- func And(x1, x2 *Bitmap) *Bitmap {
- answer := NewBitmap()
- pos1 := 0
- pos2 := 0
- length1 := x1.highlowcontainer.size()
- length2 := x2.highlowcontainer.size()
- main:
- for pos1 < length1 && pos2 < length2 {
- s1 := x1.highlowcontainer.getKeyAtIndex(pos1)
- s2 := x2.highlowcontainer.getKeyAtIndex(pos2)
- for {
- if s1 == s2 {
- C := x1.highlowcontainer.getContainerAtIndex(pos1)
- C = C.and(x2.highlowcontainer.getContainerAtIndex(pos2))
- if !C.isEmpty() {
- answer.highlowcontainer.appendContainer(s1, C, false)
- }
- pos1++
- pos2++
- if (pos1 == length1) || (pos2 == length2) {
- break main
- }
- s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
- s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
- } else if s1 < s2 {
- pos1 = x1.highlowcontainer.advanceUntil(s2, pos1)
- if pos1 == length1 {
- break main
- }
- s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
- } else { // s1 > s2
- pos2 = x2.highlowcontainer.advanceUntil(s1, pos2)
- if pos2 == length2 {
- break main
- }
- s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
- }
- }
- }
- return answer
- }
- // Xor computes the symmetric difference between two bitmaps and returns the result
- func Xor(x1, x2 *Bitmap) *Bitmap {
- answer := NewBitmap()
- pos1 := 0
- pos2 := 0
- length1 := x1.highlowcontainer.size()
- length2 := x2.highlowcontainer.size()
- for {
- if (pos1 < length1) && (pos2 < length2) {
- s1 := x1.highlowcontainer.getKeyAtIndex(pos1)
- s2 := x2.highlowcontainer.getKeyAtIndex(pos2)
- if s1 < s2 {
- answer.highlowcontainer.appendCopy(x1.highlowcontainer, pos1)
- pos1++
- } else if s1 > s2 {
- answer.highlowcontainer.appendCopy(x2.highlowcontainer, pos2)
- pos2++
- } else {
- c := x1.highlowcontainer.getContainerAtIndex(pos1).xor(x2.highlowcontainer.getContainerAtIndex(pos2))
- if !c.isEmpty() {
- answer.highlowcontainer.appendContainer(s1, c, false)
- }
- pos1++
- pos2++
- }
- } else {
- break
- }
- }
- if pos1 == length1 {
- answer.highlowcontainer.appendCopyMany(x2.highlowcontainer, pos2, length2)
- } else if pos2 == length2 {
- answer.highlowcontainer.appendCopyMany(x1.highlowcontainer, pos1, length1)
- }
- return answer
- }
- // AndNot computes the difference between two bitmaps and returns the result
- func AndNot(x1, x2 *Bitmap) *Bitmap {
- answer := NewBitmap()
- pos1 := 0
- pos2 := 0
- length1 := x1.highlowcontainer.size()
- length2 := x2.highlowcontainer.size()
- main:
- for {
- if pos1 < length1 && pos2 < length2 {
- s1 := x1.highlowcontainer.getKeyAtIndex(pos1)
- s2 := x2.highlowcontainer.getKeyAtIndex(pos2)
- for {
- if s1 < s2 {
- answer.highlowcontainer.appendCopy(x1.highlowcontainer, pos1)
- pos1++
- if pos1 == length1 {
- break main
- }
- s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
- } else if s1 == s2 {
- c1 := x1.highlowcontainer.getContainerAtIndex(pos1)
- c2 := x2.highlowcontainer.getContainerAtIndex(pos2)
- diff := c1.andNot(c2)
- if !diff.isEmpty() {
- answer.highlowcontainer.appendContainer(s1, diff, false)
- }
- pos1++
- pos2++
- if (pos1 == length1) || (pos2 == length2) {
- break main
- }
- s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
- s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
- } else { //s1 > s2
- pos2 = x2.highlowcontainer.advanceUntil(s1, pos2)
- if pos2 == length2 {
- break main
- }
- s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
- }
- }
- } else {
- break
- }
- }
- if pos2 == length2 {
- answer.highlowcontainer.appendCopyMany(x1.highlowcontainer, pos1, length1)
- }
- return answer
- }
- // AddMany add all of the values in dat
- func (rb *Bitmap) AddMany(dat []uint32) {
- if len(dat) == 0 {
- return
- }
- prev := dat[0]
- idx, c := rb.addwithptr(prev)
- for _, i := range dat[1:] {
- if highbits(prev) == highbits(i) {
- c = c.iaddReturnMinimized(lowbits(i))
- rb.highlowcontainer.setContainerAtIndex(idx, c)
- } else {
- idx, c = rb.addwithptr(i)
- }
- prev = i
- }
- }
- // BitmapOf generates a new bitmap filled with the specified integers
- func BitmapOf(dat ...uint32) *Bitmap {
- ans := NewBitmap()
- ans.AddMany(dat)
- return ans
- }
- // Flip negates the bits in the given range (i.e., [rangeStart,rangeEnd)), any integer present in this range and in the bitmap is removed,
- // and any integer present in the range and not in the bitmap is added.
- // The function uses 64-bit parameters even though a Bitmap stores 32-bit values because it is allowed and meaningful to use [0,uint64(0x100000000)) as a range
- // while uint64(0x100000000) cannot be represented as a 32-bit value.
- func (rb *Bitmap) Flip(rangeStart, rangeEnd uint64) {
- if rangeEnd > MaxUint32+1 {
- panic("rangeEnd > MaxUint32+1")
- }
- if rangeStart > MaxUint32+1 {
- panic("rangeStart > MaxUint32+1")
- }
- if rangeStart >= rangeEnd {
- return
- }
- hbStart := uint32(highbits(uint32(rangeStart)))
- lbStart := uint32(lowbits(uint32(rangeStart)))
- hbLast := uint32(highbits(uint32(rangeEnd - 1)))
- lbLast := uint32(lowbits(uint32(rangeEnd - 1)))
- var max uint32 = maxLowBit
- for hb := hbStart; hb <= hbLast; hb++ {
- var containerStart uint32
- if hb == hbStart {
- containerStart = uint32(lbStart)
- }
- containerLast := max
- if hb == hbLast {
- containerLast = uint32(lbLast)
- }
- i := rb.highlowcontainer.getIndex(uint16(hb))
- if i >= 0 {
- c := rb.highlowcontainer.getWritableContainerAtIndex(i).inot(int(containerStart), int(containerLast)+1)
- if !c.isEmpty() {
- rb.highlowcontainer.setContainerAtIndex(i, c)
- } else {
- rb.highlowcontainer.removeAtIndex(i)
- }
- } else { // *think* the range of ones must never be
- // empty.
- rb.highlowcontainer.insertNewKeyValueAt(-i-1, uint16(hb), rangeOfOnes(int(containerStart), int(containerLast)))
- }
- }
- }
- // FlipInt calls Flip after casting the parameters (convenience method)
- func (rb *Bitmap) FlipInt(rangeStart, rangeEnd int) {
- rb.Flip(uint64(rangeStart), uint64(rangeEnd))
- }
- // AddRange adds the integers in [rangeStart, rangeEnd) to the bitmap.
- // The function uses 64-bit parameters even though a Bitmap stores 32-bit values because it is allowed and meaningful to use [0,uint64(0x100000000)) as a range
- // while uint64(0x100000000) cannot be represented as a 32-bit value.
- func (rb *Bitmap) AddRange(rangeStart, rangeEnd uint64) {
- if rangeStart >= rangeEnd {
- return
- }
- if rangeEnd-1 > MaxUint32 {
- panic("rangeEnd-1 > MaxUint32")
- }
- hbStart := uint32(highbits(uint32(rangeStart)))
- lbStart := uint32(lowbits(uint32(rangeStart)))
- hbLast := uint32(highbits(uint32(rangeEnd - 1)))
- lbLast := uint32(lowbits(uint32(rangeEnd - 1)))
- var max uint32 = maxLowBit
- for hb := hbStart; hb <= hbLast; hb++ {
- containerStart := uint32(0)
- if hb == hbStart {
- containerStart = lbStart
- }
- containerLast := max
- if hb == hbLast {
- containerLast = lbLast
- }
- i := rb.highlowcontainer.getIndex(uint16(hb))
- if i >= 0 {
- c := rb.highlowcontainer.getWritableContainerAtIndex(i).iaddRange(int(containerStart), int(containerLast)+1)
- rb.highlowcontainer.setContainerAtIndex(i, c)
- } else { // *think* the range of ones must never be
- // empty.
- rb.highlowcontainer.insertNewKeyValueAt(-i-1, uint16(hb), rangeOfOnes(int(containerStart), int(containerLast)))
- }
- }
- }
- // RemoveRange removes the integers in [rangeStart, rangeEnd) from the bitmap.
- // The function uses 64-bit parameters even though a Bitmap stores 32-bit values because it is allowed and meaningful to use [0,uint64(0x100000000)) as a range
- // while uint64(0x100000000) cannot be represented as a 32-bit value.
- func (rb *Bitmap) RemoveRange(rangeStart, rangeEnd uint64) {
- if rangeStart >= rangeEnd {
- return
- }
- if rangeEnd-1 > MaxUint32 {
- // logically, we should assume that the user wants to
- // remove all values from rangeStart to infinity
- // see https://github.com/RoaringBitmap/roaring/issues/141
- rangeEnd = uint64(0x100000000)
- }
- hbStart := uint32(highbits(uint32(rangeStart)))
- lbStart := uint32(lowbits(uint32(rangeStart)))
- hbLast := uint32(highbits(uint32(rangeEnd - 1)))
- lbLast := uint32(lowbits(uint32(rangeEnd - 1)))
- var max uint32 = maxLowBit
- if hbStart == hbLast {
- i := rb.highlowcontainer.getIndex(uint16(hbStart))
- if i < 0 {
- return
- }
- c := rb.highlowcontainer.getWritableContainerAtIndex(i).iremoveRange(int(lbStart), int(lbLast+1))
- if !c.isEmpty() {
- rb.highlowcontainer.setContainerAtIndex(i, c)
- } else {
- rb.highlowcontainer.removeAtIndex(i)
- }
- return
- }
- ifirst := rb.highlowcontainer.getIndex(uint16(hbStart))
- ilast := rb.highlowcontainer.getIndex(uint16(hbLast))
- if ifirst >= 0 {
- if lbStart != 0 {
- c := rb.highlowcontainer.getWritableContainerAtIndex(ifirst).iremoveRange(int(lbStart), int(max+1))
- if !c.isEmpty() {
- rb.highlowcontainer.setContainerAtIndex(ifirst, c)
- ifirst++
- }
- }
- } else {
- ifirst = -ifirst - 1
- }
- if ilast >= 0 {
- if lbLast != max {
- c := rb.highlowcontainer.getWritableContainerAtIndex(ilast).iremoveRange(int(0), int(lbLast+1))
- if !c.isEmpty() {
- rb.highlowcontainer.setContainerAtIndex(ilast, c)
- } else {
- ilast++
- }
- } else {
- ilast++
- }
- } else {
- ilast = -ilast - 1
- }
- rb.highlowcontainer.removeIndexRange(ifirst, ilast)
- }
- // Flip negates the bits in the given range (i.e., [rangeStart,rangeEnd)), any integer present in this range and in the bitmap is removed,
- // and any integer present in the range and not in the bitmap is added, a new bitmap is returned leaving
- // the current bitmap unchanged.
- // The function uses 64-bit parameters even though a Bitmap stores 32-bit values because it is allowed and meaningful to use [0,uint64(0x100000000)) as a range
- // while uint64(0x100000000) cannot be represented as a 32-bit value.
- func Flip(bm *Bitmap, rangeStart, rangeEnd uint64) *Bitmap {
- if rangeStart >= rangeEnd {
- return bm.Clone()
- }
- if rangeStart > MaxUint32 {
- panic("rangeStart > MaxUint32")
- }
- if rangeEnd-1 > MaxUint32 {
- panic("rangeEnd-1 > MaxUint32")
- }
- answer := NewBitmap()
- hbStart := uint32(highbits(uint32(rangeStart)))
- lbStart := uint32(lowbits(uint32(rangeStart)))
- hbLast := uint32(highbits(uint32(rangeEnd - 1)))
- lbLast := uint32(lowbits(uint32(rangeEnd - 1)))
- // copy the containers before the active area
- answer.highlowcontainer.appendCopiesUntil(bm.highlowcontainer, uint16(hbStart))
- var max uint32 = maxLowBit
- for hb := hbStart; hb <= hbLast; hb++ {
- var containerStart uint32
- if hb == hbStart {
- containerStart = uint32(lbStart)
- }
- containerLast := max
- if hb == hbLast {
- containerLast = uint32(lbLast)
- }
- i := bm.highlowcontainer.getIndex(uint16(hb))
- j := answer.highlowcontainer.getIndex(uint16(hb))
- if i >= 0 {
- c := bm.highlowcontainer.getContainerAtIndex(i).not(int(containerStart), int(containerLast)+1)
- if !c.isEmpty() {
- answer.highlowcontainer.insertNewKeyValueAt(-j-1, uint16(hb), c)
- }
- } else { // *think* the range of ones must never be
- // empty.
- answer.highlowcontainer.insertNewKeyValueAt(-j-1, uint16(hb),
- rangeOfOnes(int(containerStart), int(containerLast)))
- }
- }
- // copy the containers after the active area.
- answer.highlowcontainer.appendCopiesAfter(bm.highlowcontainer, uint16(hbLast))
- return answer
- }
- // SetCopyOnWrite sets this bitmap to use copy-on-write so that copies are fast and memory conscious
- // if the parameter is true, otherwise we leave the default where hard copies are made
- // (copy-on-write requires extra care in a threaded context).
- // Calling SetCopyOnWrite(true) on a bitmap created with FromBuffer is unsafe.
- func (rb *Bitmap) SetCopyOnWrite(val bool) {
- rb.highlowcontainer.copyOnWrite = val
- }
- // GetCopyOnWrite gets this bitmap's copy-on-write property
- func (rb *Bitmap) GetCopyOnWrite() (val bool) {
- return rb.highlowcontainer.copyOnWrite
- }
- // CloneCopyOnWriteContainers clones all containers which have
- // needCopyOnWrite set to true.
- // This can be used to make sure it is safe to munmap a []byte
- // that the roaring array may still have a reference to, after
- // calling FromBuffer.
- // More generally this function is useful if you call FromBuffer
- // to construct a bitmap with a backing array buf
- // and then later discard the buf array. Note that you should call
- // CloneCopyOnWriteContainers on all bitmaps that were derived
- // from the 'FromBuffer' bitmap since they map have dependencies
- // on the buf array as well.
- func (rb *Bitmap) CloneCopyOnWriteContainers() {
- rb.highlowcontainer.cloneCopyOnWriteContainers()
- }
- // FlipInt calls Flip after casting the parameters (convenience method)
- func FlipInt(bm *Bitmap, rangeStart, rangeEnd int) *Bitmap {
- return Flip(bm, uint64(rangeStart), uint64(rangeEnd))
- }
- // Statistics provides details on the container types in use.
- type Statistics struct {
- Cardinality uint64
- Containers uint64
- ArrayContainers uint64
- ArrayContainerBytes uint64
- ArrayContainerValues uint64
- BitmapContainers uint64
- BitmapContainerBytes uint64
- BitmapContainerValues uint64
- RunContainers uint64
- RunContainerBytes uint64
- RunContainerValues uint64
- }
- // Stats returns details on container type usage in a Statistics struct.
- func (rb *Bitmap) Stats() Statistics {
- stats := Statistics{}
- stats.Containers = uint64(len(rb.highlowcontainer.containers))
- for _, c := range rb.highlowcontainer.containers {
- stats.Cardinality += uint64(c.getCardinality())
- switch c.(type) {
- case *arrayContainer:
- stats.ArrayContainers++
- stats.ArrayContainerBytes += uint64(c.getSizeInBytes())
- stats.ArrayContainerValues += uint64(c.getCardinality())
- case *bitmapContainer:
- stats.BitmapContainers++
- stats.BitmapContainerBytes += uint64(c.getSizeInBytes())
- stats.BitmapContainerValues += uint64(c.getCardinality())
- case *runContainer16:
- stats.RunContainers++
- stats.RunContainerBytes += uint64(c.getSizeInBytes())
- stats.RunContainerValues += uint64(c.getCardinality())
- }
- }
- return stats
- }
|