runcontainer.go 66 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631
  1. package roaring
  2. //
  3. // Copyright (c) 2016 by the roaring authors.
  4. // Licensed under the Apache License, Version 2.0.
  5. //
  6. // We derive a few lines of code from the sort.Search
  7. // function in the golang standard library. That function
  8. // is Copyright 2009 The Go Authors, and licensed
  9. // under the following BSD-style license.
  10. /*
  11. Copyright (c) 2009 The Go Authors. All rights reserved.
  12. Redistribution and use in source and binary forms, with or without
  13. modification, are permitted provided that the following conditions are
  14. met:
  15. * Redistributions of source code must retain the above copyright
  16. notice, this list of conditions and the following disclaimer.
  17. * Redistributions in binary form must reproduce the above
  18. copyright notice, this list of conditions and the following disclaimer
  19. in the documentation and/or other materials provided with the
  20. distribution.
  21. * Neither the name of Google Inc. nor the names of its
  22. contributors may be used to endorse or promote products derived from
  23. this software without specific prior written permission.
  24. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  25. "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  26. LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  27. A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  28. OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  29. SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  30. LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  31. DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  32. THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  33. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  34. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  35. */
  36. import (
  37. "fmt"
  38. "sort"
  39. "unsafe"
  40. )
  41. // runContainer16 does run-length encoding of sets of
  42. // uint16 integers.
  43. type runContainer16 struct {
  44. iv []interval16
  45. }
  46. // interval16 is the internal to runContainer16
  47. // structure that maintains the individual [start, last]
  48. // closed intervals.
  49. type interval16 struct {
  50. start uint16
  51. length uint16 // length minus 1
  52. }
  53. func newInterval16Range(start, last uint16) interval16 {
  54. if last < start {
  55. panic(fmt.Sprintf("last (%d) cannot be smaller than start (%d)", last, start))
  56. }
  57. return interval16{
  58. start,
  59. last - start,
  60. }
  61. }
  62. // runlen returns the count of integers in the interval.
  63. func (iv interval16) runlen() int {
  64. return int(iv.length) + 1
  65. }
  66. func (iv interval16) last() uint16 {
  67. return iv.start + iv.length
  68. }
  69. // String produces a human viewable string of the contents.
  70. func (iv interval16) String() string {
  71. return fmt.Sprintf("[%d, %d]", iv.start, iv.length)
  72. }
  73. func ivalString16(iv []interval16) string {
  74. var s string
  75. var j int
  76. var p interval16
  77. for j, p = range iv {
  78. s += fmt.Sprintf("%v:[%d, %d], ", j, p.start, p.last())
  79. }
  80. return s
  81. }
  82. // String produces a human viewable string of the contents.
  83. func (rc *runContainer16) String() string {
  84. if len(rc.iv) == 0 {
  85. return "runContainer16{}"
  86. }
  87. is := ivalString16(rc.iv)
  88. return `runContainer16{` + is + `}`
  89. }
  90. // uint16Slice is a sort.Sort convenience method
  91. type uint16Slice []uint16
  92. // Len returns the length of p.
  93. func (p uint16Slice) Len() int { return len(p) }
  94. // Less returns p[i] < p[j]
  95. func (p uint16Slice) Less(i, j int) bool { return p[i] < p[j] }
  96. // Swap swaps elements i and j.
  97. func (p uint16Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
  98. // addHelper helps build a runContainer16.
  99. type addHelper16 struct {
  100. runstart uint16
  101. runlen uint16
  102. actuallyAdded uint16
  103. m []interval16
  104. rc *runContainer16
  105. }
  106. func (ah *addHelper16) storeIval(runstart, runlen uint16) {
  107. mi := interval16{start: runstart, length: runlen}
  108. ah.m = append(ah.m, mi)
  109. }
  110. func (ah *addHelper16) add(cur, prev uint16, i int) {
  111. if cur == prev+1 {
  112. ah.runlen++
  113. ah.actuallyAdded++
  114. } else {
  115. if cur < prev {
  116. panic(fmt.Sprintf("newRunContainer16FromVals sees "+
  117. "unsorted vals; vals[%v]=cur=%v < prev=%v. Sort your vals"+
  118. " before calling us with alreadySorted == true.", i, cur, prev))
  119. }
  120. if cur == prev {
  121. // ignore duplicates
  122. } else {
  123. ah.actuallyAdded++
  124. ah.storeIval(ah.runstart, ah.runlen)
  125. ah.runstart = cur
  126. ah.runlen = 0
  127. }
  128. }
  129. }
  130. // newRunContainerRange makes a new container made of just the specified closed interval [rangestart,rangelast]
  131. func newRunContainer16Range(rangestart uint16, rangelast uint16) *runContainer16 {
  132. rc := &runContainer16{}
  133. rc.iv = append(rc.iv, newInterval16Range(rangestart, rangelast))
  134. return rc
  135. }
  136. // newRunContainer16FromVals makes a new container from vals.
  137. //
  138. // For efficiency, vals should be sorted in ascending order.
  139. // Ideally vals should not contain duplicates, but we detect and
  140. // ignore them. If vals is already sorted in ascending order, then
  141. // pass alreadySorted = true. Otherwise, for !alreadySorted,
  142. // we will sort vals before creating a runContainer16 of them.
  143. // We sort the original vals, so this will change what the
  144. // caller sees in vals as a side effect.
  145. func newRunContainer16FromVals(alreadySorted bool, vals ...uint16) *runContainer16 {
  146. // keep this in sync with newRunContainer16FromArray below
  147. rc := &runContainer16{}
  148. ah := addHelper16{rc: rc}
  149. if !alreadySorted {
  150. sort.Sort(uint16Slice(vals))
  151. }
  152. n := len(vals)
  153. var cur, prev uint16
  154. switch {
  155. case n == 0:
  156. // nothing more
  157. case n == 1:
  158. ah.m = append(ah.m, newInterval16Range(vals[0], vals[0]))
  159. ah.actuallyAdded++
  160. default:
  161. ah.runstart = vals[0]
  162. ah.actuallyAdded++
  163. for i := 1; i < n; i++ {
  164. prev = vals[i-1]
  165. cur = vals[i]
  166. ah.add(cur, prev, i)
  167. }
  168. ah.storeIval(ah.runstart, ah.runlen)
  169. }
  170. rc.iv = ah.m
  171. return rc
  172. }
  173. // newRunContainer16FromBitmapContainer makes a new run container from bc,
  174. // somewhat efficiently. For reference, see the Java
  175. // https://github.com/RoaringBitmap/RoaringBitmap/blob/master/src/main/java/org/roaringbitmap/RunContainer.java#L145-L192
  176. func newRunContainer16FromBitmapContainer(bc *bitmapContainer) *runContainer16 {
  177. rc := &runContainer16{}
  178. nbrRuns := bc.numberOfRuns()
  179. if nbrRuns == 0 {
  180. return rc
  181. }
  182. rc.iv = make([]interval16, nbrRuns)
  183. longCtr := 0 // index of current long in bitmap
  184. curWord := bc.bitmap[0] // its value
  185. runCount := 0
  186. for {
  187. // potentially multiword advance to first 1 bit
  188. for curWord == 0 && longCtr < len(bc.bitmap)-1 {
  189. longCtr++
  190. curWord = bc.bitmap[longCtr]
  191. }
  192. if curWord == 0 {
  193. // wrap up, no more runs
  194. return rc
  195. }
  196. localRunStart := countTrailingZeros(curWord)
  197. runStart := localRunStart + 64*longCtr
  198. // stuff 1s into number's LSBs
  199. curWordWith1s := curWord | (curWord - 1)
  200. // find the next 0, potentially in a later word
  201. runEnd := 0
  202. for curWordWith1s == maxWord && longCtr < len(bc.bitmap)-1 {
  203. longCtr++
  204. curWordWith1s = bc.bitmap[longCtr]
  205. }
  206. if curWordWith1s == maxWord {
  207. // a final unterminated run of 1s
  208. runEnd = wordSizeInBits + longCtr*64
  209. rc.iv[runCount].start = uint16(runStart)
  210. rc.iv[runCount].length = uint16(runEnd) - uint16(runStart) - 1
  211. return rc
  212. }
  213. localRunEnd := countTrailingZeros(^curWordWith1s)
  214. runEnd = localRunEnd + longCtr*64
  215. rc.iv[runCount].start = uint16(runStart)
  216. rc.iv[runCount].length = uint16(runEnd) - 1 - uint16(runStart)
  217. runCount++
  218. // now, zero out everything right of runEnd.
  219. curWord = curWordWith1s & (curWordWith1s + 1)
  220. // We've lathered and rinsed, so repeat...
  221. }
  222. }
  223. //
  224. // newRunContainer16FromArray populates a new
  225. // runContainer16 from the contents of arr.
  226. //
  227. func newRunContainer16FromArray(arr *arrayContainer) *runContainer16 {
  228. // keep this in sync with newRunContainer16FromVals above
  229. rc := &runContainer16{}
  230. ah := addHelper16{rc: rc}
  231. n := arr.getCardinality()
  232. var cur, prev uint16
  233. switch {
  234. case n == 0:
  235. // nothing more
  236. case n == 1:
  237. ah.m = append(ah.m, newInterval16Range(arr.content[0], arr.content[0]))
  238. ah.actuallyAdded++
  239. default:
  240. ah.runstart = arr.content[0]
  241. ah.actuallyAdded++
  242. for i := 1; i < n; i++ {
  243. prev = arr.content[i-1]
  244. cur = arr.content[i]
  245. ah.add(cur, prev, i)
  246. }
  247. ah.storeIval(ah.runstart, ah.runlen)
  248. }
  249. rc.iv = ah.m
  250. return rc
  251. }
  252. // set adds the integers in vals to the set. Vals
  253. // must be sorted in increasing order; if not, you should set
  254. // alreadySorted to false, and we will sort them in place for you.
  255. // (Be aware of this side effect -- it will affect the callers
  256. // view of vals).
  257. //
  258. // If you have a small number of additions to an already
  259. // big runContainer16, calling Add() may be faster.
  260. func (rc *runContainer16) set(alreadySorted bool, vals ...uint16) {
  261. rc2 := newRunContainer16FromVals(alreadySorted, vals...)
  262. un := rc.union(rc2)
  263. rc.iv = un.iv
  264. }
  265. // canMerge returns true iff the intervals
  266. // a and b either overlap or they are
  267. // contiguous and so can be merged into
  268. // a single interval.
  269. func canMerge16(a, b interval16) bool {
  270. if int(a.last())+1 < int(b.start) {
  271. return false
  272. }
  273. return int(b.last())+1 >= int(a.start)
  274. }
  275. // haveOverlap differs from canMerge in that
  276. // it tells you if the intersection of a
  277. // and b would contain an element (otherwise
  278. // it would be the empty set, and we return
  279. // false).
  280. func haveOverlap16(a, b interval16) bool {
  281. if int(a.last())+1 <= int(b.start) {
  282. return false
  283. }
  284. return int(b.last())+1 > int(a.start)
  285. }
  286. // mergeInterval16s joins a and b into a
  287. // new interval, and panics if it cannot.
  288. func mergeInterval16s(a, b interval16) (res interval16) {
  289. if !canMerge16(a, b) {
  290. panic(fmt.Sprintf("cannot merge %#v and %#v", a, b))
  291. }
  292. if b.start < a.start {
  293. res.start = b.start
  294. } else {
  295. res.start = a.start
  296. }
  297. if b.last() > a.last() {
  298. res.length = b.last() - res.start
  299. } else {
  300. res.length = a.last() - res.start
  301. }
  302. return
  303. }
  304. // intersectInterval16s returns the intersection
  305. // of a and b. The isEmpty flag will be true if
  306. // a and b were disjoint.
  307. func intersectInterval16s(a, b interval16) (res interval16, isEmpty bool) {
  308. if !haveOverlap16(a, b) {
  309. isEmpty = true
  310. return
  311. }
  312. if b.start > a.start {
  313. res.start = b.start
  314. } else {
  315. res.start = a.start
  316. }
  317. bEnd := b.last()
  318. aEnd := a.last()
  319. var resEnd uint16
  320. if bEnd < aEnd {
  321. resEnd = bEnd
  322. } else {
  323. resEnd = aEnd
  324. }
  325. res.length = resEnd - res.start
  326. return
  327. }
  328. // union merges two runContainer16s, producing
  329. // a new runContainer16 with the union of rc and b.
  330. func (rc *runContainer16) union(b *runContainer16) *runContainer16 {
  331. // rc is also known as 'a' here, but golint insisted we
  332. // call it rc for consistency with the rest of the methods.
  333. var m []interval16
  334. alim := int(len(rc.iv))
  335. blim := int(len(b.iv))
  336. var na int // next from a
  337. var nb int // next from b
  338. // merged holds the current merge output, which might
  339. // get additional merges before being appended to m.
  340. var merged interval16
  341. var mergedUsed bool // is merged being used at the moment?
  342. var cura interval16 // currently considering this interval16 from a
  343. var curb interval16 // currently considering this interval16 from b
  344. pass := 0
  345. for na < alim && nb < blim {
  346. pass++
  347. cura = rc.iv[na]
  348. curb = b.iv[nb]
  349. if mergedUsed {
  350. mergedUpdated := false
  351. if canMerge16(cura, merged) {
  352. merged = mergeInterval16s(cura, merged)
  353. na = rc.indexOfIntervalAtOrAfter(int(merged.last())+1, na+1)
  354. mergedUpdated = true
  355. }
  356. if canMerge16(curb, merged) {
  357. merged = mergeInterval16s(curb, merged)
  358. nb = b.indexOfIntervalAtOrAfter(int(merged.last())+1, nb+1)
  359. mergedUpdated = true
  360. }
  361. if !mergedUpdated {
  362. // we know that merged is disjoint from cura and curb
  363. m = append(m, merged)
  364. mergedUsed = false
  365. }
  366. continue
  367. } else {
  368. // !mergedUsed
  369. if !canMerge16(cura, curb) {
  370. if cura.start < curb.start {
  371. m = append(m, cura)
  372. na++
  373. } else {
  374. m = append(m, curb)
  375. nb++
  376. }
  377. } else {
  378. merged = mergeInterval16s(cura, curb)
  379. mergedUsed = true
  380. na = rc.indexOfIntervalAtOrAfter(int(merged.last())+1, na+1)
  381. nb = b.indexOfIntervalAtOrAfter(int(merged.last())+1, nb+1)
  382. }
  383. }
  384. }
  385. var aDone, bDone bool
  386. if na >= alim {
  387. aDone = true
  388. }
  389. if nb >= blim {
  390. bDone = true
  391. }
  392. // finish by merging anything remaining into merged we can:
  393. if mergedUsed {
  394. if !aDone {
  395. aAdds:
  396. for na < alim {
  397. cura = rc.iv[na]
  398. if canMerge16(cura, merged) {
  399. merged = mergeInterval16s(cura, merged)
  400. na = rc.indexOfIntervalAtOrAfter(int(merged.last())+1, na+1)
  401. } else {
  402. break aAdds
  403. }
  404. }
  405. }
  406. if !bDone {
  407. bAdds:
  408. for nb < blim {
  409. curb = b.iv[nb]
  410. if canMerge16(curb, merged) {
  411. merged = mergeInterval16s(curb, merged)
  412. nb = b.indexOfIntervalAtOrAfter(int(merged.last())+1, nb+1)
  413. } else {
  414. break bAdds
  415. }
  416. }
  417. }
  418. m = append(m, merged)
  419. }
  420. if na < alim {
  421. m = append(m, rc.iv[na:]...)
  422. }
  423. if nb < blim {
  424. m = append(m, b.iv[nb:]...)
  425. }
  426. res := &runContainer16{iv: m}
  427. return res
  428. }
  429. // unionCardinality returns the cardinality of the merger of two runContainer16s, the union of rc and b.
  430. func (rc *runContainer16) unionCardinality(b *runContainer16) uint {
  431. // rc is also known as 'a' here, but golint insisted we
  432. // call it rc for consistency with the rest of the methods.
  433. answer := uint(0)
  434. alim := int(len(rc.iv))
  435. blim := int(len(b.iv))
  436. var na int // next from a
  437. var nb int // next from b
  438. // merged holds the current merge output, which might
  439. // get additional merges before being appended to m.
  440. var merged interval16
  441. var mergedUsed bool // is merged being used at the moment?
  442. var cura interval16 // currently considering this interval16 from a
  443. var curb interval16 // currently considering this interval16 from b
  444. pass := 0
  445. for na < alim && nb < blim {
  446. pass++
  447. cura = rc.iv[na]
  448. curb = b.iv[nb]
  449. if mergedUsed {
  450. mergedUpdated := false
  451. if canMerge16(cura, merged) {
  452. merged = mergeInterval16s(cura, merged)
  453. na = rc.indexOfIntervalAtOrAfter(int(merged.last())+1, na+1)
  454. mergedUpdated = true
  455. }
  456. if canMerge16(curb, merged) {
  457. merged = mergeInterval16s(curb, merged)
  458. nb = b.indexOfIntervalAtOrAfter(int(merged.last())+1, nb+1)
  459. mergedUpdated = true
  460. }
  461. if !mergedUpdated {
  462. // we know that merged is disjoint from cura and curb
  463. //m = append(m, merged)
  464. answer += uint(merged.last()) - uint(merged.start) + 1
  465. mergedUsed = false
  466. }
  467. continue
  468. } else {
  469. // !mergedUsed
  470. if !canMerge16(cura, curb) {
  471. if cura.start < curb.start {
  472. answer += uint(cura.last()) - uint(cura.start) + 1
  473. //m = append(m, cura)
  474. na++
  475. } else {
  476. answer += uint(curb.last()) - uint(curb.start) + 1
  477. //m = append(m, curb)
  478. nb++
  479. }
  480. } else {
  481. merged = mergeInterval16s(cura, curb)
  482. mergedUsed = true
  483. na = rc.indexOfIntervalAtOrAfter(int(merged.last())+1, na+1)
  484. nb = b.indexOfIntervalAtOrAfter(int(merged.last())+1, nb+1)
  485. }
  486. }
  487. }
  488. var aDone, bDone bool
  489. if na >= alim {
  490. aDone = true
  491. }
  492. if nb >= blim {
  493. bDone = true
  494. }
  495. // finish by merging anything remaining into merged we can:
  496. if mergedUsed {
  497. if !aDone {
  498. aAdds:
  499. for na < alim {
  500. cura = rc.iv[na]
  501. if canMerge16(cura, merged) {
  502. merged = mergeInterval16s(cura, merged)
  503. na = rc.indexOfIntervalAtOrAfter(int(merged.last())+1, na+1)
  504. } else {
  505. break aAdds
  506. }
  507. }
  508. }
  509. if !bDone {
  510. bAdds:
  511. for nb < blim {
  512. curb = b.iv[nb]
  513. if canMerge16(curb, merged) {
  514. merged = mergeInterval16s(curb, merged)
  515. nb = b.indexOfIntervalAtOrAfter(int(merged.last())+1, nb+1)
  516. } else {
  517. break bAdds
  518. }
  519. }
  520. }
  521. //m = append(m, merged)
  522. answer += uint(merged.last()) - uint(merged.start) + 1
  523. }
  524. for _, r := range rc.iv[na:] {
  525. answer += uint(r.last()) - uint(r.start) + 1
  526. }
  527. for _, r := range b.iv[nb:] {
  528. answer += uint(r.last()) - uint(r.start) + 1
  529. }
  530. return answer
  531. }
  532. // indexOfIntervalAtOrAfter is a helper for union.
  533. func (rc *runContainer16) indexOfIntervalAtOrAfter(key int, startIndex int) int {
  534. w, already, _ := rc.searchRange(key, startIndex, 0)
  535. if already {
  536. return w
  537. }
  538. return w + 1
  539. }
  540. // intersect returns a new runContainer16 holding the
  541. // intersection of rc (also known as 'a') and b.
  542. func (rc *runContainer16) intersect(b *runContainer16) *runContainer16 {
  543. a := rc
  544. numa := int(len(a.iv))
  545. numb := int(len(b.iv))
  546. res := &runContainer16{}
  547. if numa == 0 || numb == 0 {
  548. return res
  549. }
  550. if numa == 1 && numb == 1 {
  551. if !haveOverlap16(a.iv[0], b.iv[0]) {
  552. return res
  553. }
  554. }
  555. var output []interval16
  556. var acuri int
  557. var bcuri int
  558. astart := int(a.iv[acuri].start)
  559. bstart := int(b.iv[bcuri].start)
  560. var intersection interval16
  561. var leftoverstart int
  562. var isOverlap, isLeftoverA, isLeftoverB bool
  563. var done bool
  564. toploop:
  565. for acuri < numa && bcuri < numb {
  566. isOverlap, isLeftoverA, isLeftoverB, leftoverstart, intersection =
  567. intersectWithLeftover16(astart, int(a.iv[acuri].last()), bstart, int(b.iv[bcuri].last()))
  568. if !isOverlap {
  569. switch {
  570. case astart < bstart:
  571. acuri, done = a.findNextIntervalThatIntersectsStartingFrom(acuri+1, bstart)
  572. if done {
  573. break toploop
  574. }
  575. astart = int(a.iv[acuri].start)
  576. case astart > bstart:
  577. bcuri, done = b.findNextIntervalThatIntersectsStartingFrom(bcuri+1, astart)
  578. if done {
  579. break toploop
  580. }
  581. bstart = int(b.iv[bcuri].start)
  582. }
  583. } else {
  584. // isOverlap
  585. output = append(output, intersection)
  586. switch {
  587. case isLeftoverA:
  588. // note that we change astart without advancing acuri,
  589. // since we need to capture any 2ndary intersections with a.iv[acuri]
  590. astart = leftoverstart
  591. bcuri++
  592. if bcuri >= numb {
  593. break toploop
  594. }
  595. bstart = int(b.iv[bcuri].start)
  596. case isLeftoverB:
  597. // note that we change bstart without advancing bcuri,
  598. // since we need to capture any 2ndary intersections with b.iv[bcuri]
  599. bstart = leftoverstart
  600. acuri++
  601. if acuri >= numa {
  602. break toploop
  603. }
  604. astart = int(a.iv[acuri].start)
  605. default:
  606. // neither had leftover, both completely consumed
  607. // advance to next a interval
  608. acuri++
  609. if acuri >= numa {
  610. break toploop
  611. }
  612. astart = int(a.iv[acuri].start)
  613. // advance to next b interval
  614. bcuri++
  615. if bcuri >= numb {
  616. break toploop
  617. }
  618. bstart = int(b.iv[bcuri].start)
  619. }
  620. }
  621. } // end for toploop
  622. if len(output) == 0 {
  623. return res
  624. }
  625. res.iv = output
  626. return res
  627. }
  628. // intersectCardinality returns the cardinality of the
  629. // intersection of rc (also known as 'a') and b.
  630. func (rc *runContainer16) intersectCardinality(b *runContainer16) int {
  631. answer := int(0)
  632. a := rc
  633. numa := int(len(a.iv))
  634. numb := int(len(b.iv))
  635. if numa == 0 || numb == 0 {
  636. return 0
  637. }
  638. if numa == 1 && numb == 1 {
  639. if !haveOverlap16(a.iv[0], b.iv[0]) {
  640. return 0
  641. }
  642. }
  643. var acuri int
  644. var bcuri int
  645. astart := int(a.iv[acuri].start)
  646. bstart := int(b.iv[bcuri].start)
  647. var intersection interval16
  648. var leftoverstart int
  649. var isOverlap, isLeftoverA, isLeftoverB bool
  650. var done bool
  651. pass := 0
  652. toploop:
  653. for acuri < numa && bcuri < numb {
  654. pass++
  655. isOverlap, isLeftoverA, isLeftoverB, leftoverstart, intersection =
  656. intersectWithLeftover16(astart, int(a.iv[acuri].last()), bstart, int(b.iv[bcuri].last()))
  657. if !isOverlap {
  658. switch {
  659. case astart < bstart:
  660. acuri, done = a.findNextIntervalThatIntersectsStartingFrom(acuri+1, bstart)
  661. if done {
  662. break toploop
  663. }
  664. astart = int(a.iv[acuri].start)
  665. case astart > bstart:
  666. bcuri, done = b.findNextIntervalThatIntersectsStartingFrom(bcuri+1, astart)
  667. if done {
  668. break toploop
  669. }
  670. bstart = int(b.iv[bcuri].start)
  671. }
  672. } else {
  673. // isOverlap
  674. answer += int(intersection.last()) - int(intersection.start) + 1
  675. switch {
  676. case isLeftoverA:
  677. // note that we change astart without advancing acuri,
  678. // since we need to capture any 2ndary intersections with a.iv[acuri]
  679. astart = leftoverstart
  680. bcuri++
  681. if bcuri >= numb {
  682. break toploop
  683. }
  684. bstart = int(b.iv[bcuri].start)
  685. case isLeftoverB:
  686. // note that we change bstart without advancing bcuri,
  687. // since we need to capture any 2ndary intersections with b.iv[bcuri]
  688. bstart = leftoverstart
  689. acuri++
  690. if acuri >= numa {
  691. break toploop
  692. }
  693. astart = int(a.iv[acuri].start)
  694. default:
  695. // neither had leftover, both completely consumed
  696. // advance to next a interval
  697. acuri++
  698. if acuri >= numa {
  699. break toploop
  700. }
  701. astart = int(a.iv[acuri].start)
  702. // advance to next b interval
  703. bcuri++
  704. if bcuri >= numb {
  705. break toploop
  706. }
  707. bstart = int(b.iv[bcuri].start)
  708. }
  709. }
  710. } // end for toploop
  711. return answer
  712. }
  713. // get returns true iff key is in the container.
  714. func (rc *runContainer16) contains(key uint16) bool {
  715. _, in, _ := rc.search(int(key))
  716. return in
  717. }
  718. // numIntervals returns the count of intervals in the container.
  719. func (rc *runContainer16) numIntervals() int {
  720. return len(rc.iv)
  721. }
  722. // searchRange returns alreadyPresent to indicate if the
  723. // key is already in one of our interval16s.
  724. //
  725. // If key is alreadyPresent, then whichInterval16 tells
  726. // you where.
  727. //
  728. // If key is not already present, then whichInterval16 is
  729. // set as follows:
  730. //
  731. // a) whichInterval16 == len(rc.iv)-1 if key is beyond our
  732. // last interval16 in rc.iv;
  733. //
  734. // b) whichInterval16 == -1 if key is before our first
  735. // interval16 in rc.iv;
  736. //
  737. // c) whichInterval16 is set to the minimum index of rc.iv
  738. // which comes strictly before the key;
  739. // so rc.iv[whichInterval16].last < key,
  740. // and if whichInterval16+1 exists, then key < rc.iv[whichInterval16+1].start
  741. // (Note that whichInterval16+1 won't exist when
  742. // whichInterval16 is the last interval.)
  743. //
  744. // runContainer16.search always returns whichInterval16 < len(rc.iv).
  745. //
  746. // The search space is from startIndex to endxIndex. If endxIndex is set to zero, then there
  747. // no upper bound.
  748. //
  749. func (rc *runContainer16) searchRange(key int, startIndex int, endxIndex int) (whichInterval16 int, alreadyPresent bool, numCompares int) {
  750. n := int(len(rc.iv))
  751. if n == 0 {
  752. return -1, false, 0
  753. }
  754. if endxIndex == 0 {
  755. endxIndex = n
  756. }
  757. // sort.Search returns the smallest index i
  758. // in [0, n) at which f(i) is true, assuming that on the range [0, n),
  759. // f(i) == true implies f(i+1) == true.
  760. // If there is no such index, Search returns n.
  761. // For correctness, this began as verbatim snippet from
  762. // sort.Search in the Go standard lib.
  763. // We inline our comparison function for speed, and
  764. // annotate with numCompares
  765. // to observe and test that extra bounds are utilized.
  766. i, j := startIndex, endxIndex
  767. for i < j {
  768. h := i + (j-i)/2 // avoid overflow when computing h as the bisector
  769. // i <= h < j
  770. numCompares++
  771. if !(key < int(rc.iv[h].start)) {
  772. i = h + 1
  773. } else {
  774. j = h
  775. }
  776. }
  777. below := i
  778. // end std lib snippet.
  779. // The above is a simple in-lining and annotation of:
  780. /* below := sort.Search(n,
  781. func(i int) bool {
  782. return key < rc.iv[i].start
  783. })
  784. */
  785. whichInterval16 = below - 1
  786. if below == n {
  787. // all falses => key is >= start of all interval16s
  788. // ... so does it belong to the last interval16?
  789. if key < int(rc.iv[n-1].last())+1 {
  790. // yes, it belongs to the last interval16
  791. alreadyPresent = true
  792. return
  793. }
  794. // no, it is beyond the last interval16.
  795. // leave alreadyPreset = false
  796. return
  797. }
  798. // INVAR: key is below rc.iv[below]
  799. if below == 0 {
  800. // key is before the first first interval16.
  801. // leave alreadyPresent = false
  802. return
  803. }
  804. // INVAR: key is >= rc.iv[below-1].start and
  805. // key is < rc.iv[below].start
  806. // is key in below-1 interval16?
  807. if key >= int(rc.iv[below-1].start) && key < int(rc.iv[below-1].last())+1 {
  808. // yes, it is. key is in below-1 interval16.
  809. alreadyPresent = true
  810. return
  811. }
  812. // INVAR: key >= rc.iv[below-1].endx && key < rc.iv[below].start
  813. // leave alreadyPresent = false
  814. return
  815. }
  816. // search returns alreadyPresent to indicate if the
  817. // key is already in one of our interval16s.
  818. //
  819. // If key is alreadyPresent, then whichInterval16 tells
  820. // you where.
  821. //
  822. // If key is not already present, then whichInterval16 is
  823. // set as follows:
  824. //
  825. // a) whichInterval16 == len(rc.iv)-1 if key is beyond our
  826. // last interval16 in rc.iv;
  827. //
  828. // b) whichInterval16 == -1 if key is before our first
  829. // interval16 in rc.iv;
  830. //
  831. // c) whichInterval16 is set to the minimum index of rc.iv
  832. // which comes strictly before the key;
  833. // so rc.iv[whichInterval16].last < key,
  834. // and if whichInterval16+1 exists, then key < rc.iv[whichInterval16+1].start
  835. // (Note that whichInterval16+1 won't exist when
  836. // whichInterval16 is the last interval.)
  837. //
  838. // runContainer16.search always returns whichInterval16 < len(rc.iv).
  839. //
  840. func (rc *runContainer16) search(key int) (whichInterval16 int, alreadyPresent bool, numCompares int) {
  841. return rc.searchRange(key, 0, 0)
  842. }
  843. // getCardinality returns the count of the integers stored in the
  844. // runContainer16. The running complexity depends on the size
  845. // of the container.
  846. func (rc *runContainer16) getCardinality() int {
  847. // have to compute it
  848. n := 0
  849. for _, p := range rc.iv {
  850. n += p.runlen()
  851. }
  852. return n
  853. }
  854. // isEmpty returns true if the container is empty.
  855. // It runs in constant time.
  856. func (rc *runContainer16) isEmpty() bool {
  857. return len(rc.iv) == 0
  858. }
  859. // AsSlice decompresses the contents into a []uint16 slice.
  860. func (rc *runContainer16) AsSlice() []uint16 {
  861. s := make([]uint16, rc.getCardinality())
  862. j := 0
  863. for _, p := range rc.iv {
  864. for i := p.start; i <= p.last(); i++ {
  865. s[j] = i
  866. j++
  867. }
  868. }
  869. return s
  870. }
  871. // newRunContainer16 creates an empty run container.
  872. func newRunContainer16() *runContainer16 {
  873. return &runContainer16{}
  874. }
  875. // newRunContainer16CopyIv creates a run container, initializing
  876. // with a copy of the supplied iv slice.
  877. //
  878. func newRunContainer16CopyIv(iv []interval16) *runContainer16 {
  879. rc := &runContainer16{
  880. iv: make([]interval16, len(iv)),
  881. }
  882. copy(rc.iv, iv)
  883. return rc
  884. }
  885. func (rc *runContainer16) Clone() *runContainer16 {
  886. rc2 := newRunContainer16CopyIv(rc.iv)
  887. return rc2
  888. }
  889. // newRunContainer16TakeOwnership returns a new runContainer16
  890. // backed by the provided iv slice, which we will
  891. // assume exclusive control over from now on.
  892. //
  893. func newRunContainer16TakeOwnership(iv []interval16) *runContainer16 {
  894. rc := &runContainer16{
  895. iv: iv,
  896. }
  897. return rc
  898. }
  899. const baseRc16Size = int(unsafe.Sizeof(runContainer16{}))
  900. const perIntervalRc16Size = int(unsafe.Sizeof(interval16{}))
  901. const baseDiskRc16Size = int(unsafe.Sizeof(uint16(0)))
  902. // see also runContainer16SerializedSizeInBytes(numRuns int) int
  903. // getSizeInBytes returns the number of bytes of memory
  904. // required by this runContainer16.
  905. func (rc *runContainer16) getSizeInBytes() int {
  906. return perIntervalRc16Size*len(rc.iv) + baseRc16Size
  907. }
  908. // runContainer16SerializedSizeInBytes returns the number of bytes of disk
  909. // required to hold numRuns in a runContainer16.
  910. func runContainer16SerializedSizeInBytes(numRuns int) int {
  911. return perIntervalRc16Size*numRuns + baseDiskRc16Size
  912. }
  913. // Add adds a single value k to the set.
  914. func (rc *runContainer16) Add(k uint16) (wasNew bool) {
  915. // TODO comment from runContainer16.java:
  916. // it might be better and simpler to do return
  917. // toBitmapOrArrayContainer(getCardinality()).add(k)
  918. // but note that some unit tests use this method to build up test
  919. // runcontainers without calling runOptimize
  920. k64 := int(k)
  921. index, present, _ := rc.search(k64)
  922. if present {
  923. return // already there
  924. }
  925. wasNew = true
  926. n := int(len(rc.iv))
  927. if index == -1 {
  928. // we may need to extend the first run
  929. if n > 0 {
  930. if rc.iv[0].start == k+1 {
  931. rc.iv[0].start = k
  932. rc.iv[0].length++
  933. return
  934. }
  935. }
  936. // nope, k stands alone, starting the new first interval16.
  937. rc.iv = append([]interval16{newInterval16Range(k, k)}, rc.iv...)
  938. return
  939. }
  940. // are we off the end? handle both index == n and index == n-1:
  941. if index >= n-1 {
  942. if int(rc.iv[n-1].last())+1 == k64 {
  943. rc.iv[n-1].length++
  944. return
  945. }
  946. rc.iv = append(rc.iv, newInterval16Range(k, k))
  947. return
  948. }
  949. // INVAR: index and index+1 both exist, and k goes between them.
  950. //
  951. // Now: add k into the middle,
  952. // possibly fusing with index or index+1 interval16
  953. // and possibly resulting in fusing of two interval16s
  954. // that had a one integer gap.
  955. left := index
  956. right := index + 1
  957. // are we fusing left and right by adding k?
  958. if int(rc.iv[left].last())+1 == k64 && int(rc.iv[right].start) == k64+1 {
  959. // fuse into left
  960. rc.iv[left].length = rc.iv[right].last() - rc.iv[left].start
  961. // remove redundant right
  962. rc.iv = append(rc.iv[:left+1], rc.iv[right+1:]...)
  963. return
  964. }
  965. // are we an addition to left?
  966. if int(rc.iv[left].last())+1 == k64 {
  967. // yes
  968. rc.iv[left].length++
  969. return
  970. }
  971. // are we an addition to right?
  972. if int(rc.iv[right].start) == k64+1 {
  973. // yes
  974. rc.iv[right].start = k
  975. rc.iv[right].length++
  976. return
  977. }
  978. // k makes a standalone new interval16, inserted in the middle
  979. tail := append([]interval16{newInterval16Range(k, k)}, rc.iv[right:]...)
  980. rc.iv = append(rc.iv[:left+1], tail...)
  981. return
  982. }
  983. // runIterator16 advice: you must call hasNext()
  984. // before calling next()/peekNext() to insure there are contents.
  985. type runIterator16 struct {
  986. rc *runContainer16
  987. curIndex int
  988. curPosInIndex uint16
  989. }
  990. // newRunIterator16 returns a new empty run container.
  991. func (rc *runContainer16) newRunIterator16() *runIterator16 {
  992. return &runIterator16{rc: rc, curIndex: 0, curPosInIndex: 0}
  993. }
  994. func (rc *runContainer16) iterate(cb func(x uint16) bool) bool {
  995. iterator := runIterator16{rc, 0, 0}
  996. for iterator.hasNext() {
  997. if !cb(iterator.next()) {
  998. return false
  999. }
  1000. }
  1001. return true
  1002. }
  1003. // hasNext returns false if calling next will panic. It
  1004. // returns true when there is at least one more value
  1005. // available in the iteration sequence.
  1006. func (ri *runIterator16) hasNext() bool {
  1007. return int(len(ri.rc.iv)) > ri.curIndex+1 ||
  1008. (int(len(ri.rc.iv)) == ri.curIndex+1 && ri.rc.iv[ri.curIndex].length >= ri.curPosInIndex)
  1009. }
  1010. // next returns the next value in the iteration sequence.
  1011. func (ri *runIterator16) next() uint16 {
  1012. next := ri.rc.iv[ri.curIndex].start + ri.curPosInIndex
  1013. if ri.curPosInIndex == ri.rc.iv[ri.curIndex].length {
  1014. ri.curPosInIndex = 0
  1015. ri.curIndex++
  1016. } else {
  1017. ri.curPosInIndex++
  1018. }
  1019. return next
  1020. }
  1021. // peekNext returns the next value in the iteration sequence without advancing the iterator
  1022. func (ri *runIterator16) peekNext() uint16 {
  1023. return ri.rc.iv[ri.curIndex].start + ri.curPosInIndex
  1024. }
  1025. // advanceIfNeeded advances as long as the next value is smaller than minval
  1026. func (ri *runIterator16) advanceIfNeeded(minval uint16) {
  1027. if !ri.hasNext() || ri.peekNext() >= minval {
  1028. return
  1029. }
  1030. // interval cannot be -1 because of minval > peekNext
  1031. interval, isPresent, _ := ri.rc.searchRange(int(minval), ri.curIndex, int(len(ri.rc.iv)))
  1032. // if the minval is present, set the curPosIndex at the right position
  1033. if isPresent {
  1034. ri.curIndex = interval
  1035. ri.curPosInIndex = minval - ri.rc.iv[ri.curIndex].start
  1036. } else {
  1037. // otherwise interval is set to to the minimum index of rc.iv
  1038. // which comes strictly before the key, that's why we set the next interval
  1039. ri.curIndex = interval + 1
  1040. ri.curPosInIndex = 0
  1041. }
  1042. }
  1043. // runReverseIterator16 advice: you must call hasNext()
  1044. // before calling next() to insure there are contents.
  1045. type runReverseIterator16 struct {
  1046. rc *runContainer16
  1047. curIndex int // index into rc.iv
  1048. curPosInIndex uint16 // offset in rc.iv[curIndex]
  1049. }
  1050. // newRunReverseIterator16 returns a new empty run iterator.
  1051. func (rc *runContainer16) newRunReverseIterator16() *runReverseIterator16 {
  1052. index := int(len(rc.iv)) - 1
  1053. pos := uint16(0)
  1054. if index >= 0 {
  1055. pos = rc.iv[index].length
  1056. }
  1057. return &runReverseIterator16{
  1058. rc: rc,
  1059. curIndex: index,
  1060. curPosInIndex: pos,
  1061. }
  1062. }
  1063. // hasNext returns false if calling next will panic. It
  1064. // returns true when there is at least one more value
  1065. // available in the iteration sequence.
  1066. func (ri *runReverseIterator16) hasNext() bool {
  1067. return ri.curIndex > 0 || ri.curIndex == 0 && ri.curPosInIndex >= 0
  1068. }
  1069. // next returns the next value in the iteration sequence.
  1070. func (ri *runReverseIterator16) next() uint16 {
  1071. next := ri.rc.iv[ri.curIndex].start + ri.curPosInIndex
  1072. if ri.curPosInIndex > 0 {
  1073. ri.curPosInIndex--
  1074. } else {
  1075. ri.curIndex--
  1076. if ri.curIndex >= 0 {
  1077. ri.curPosInIndex = ri.rc.iv[ri.curIndex].length
  1078. }
  1079. }
  1080. return next
  1081. }
  1082. func (rc *runContainer16) newManyRunIterator16() *runIterator16 {
  1083. return rc.newRunIterator16()
  1084. }
  1085. // hs are the high bits to include to avoid needing to reiterate over the buffer in NextMany
  1086. func (ri *runIterator16) nextMany(hs uint32, buf []uint32) int {
  1087. n := 0
  1088. if !ri.hasNext() {
  1089. return n
  1090. }
  1091. // start and end are inclusive
  1092. for n < len(buf) {
  1093. moreVals := 0
  1094. if ri.rc.iv[ri.curIndex].length >= ri.curPosInIndex {
  1095. // add as many as you can from this seq
  1096. moreVals = minOfInt(int(ri.rc.iv[ri.curIndex].length-ri.curPosInIndex)+1, len(buf)-n)
  1097. base := uint32(ri.rc.iv[ri.curIndex].start+ri.curPosInIndex) | hs
  1098. // allows BCE
  1099. buf2 := buf[n : n+moreVals]
  1100. for i := range buf2 {
  1101. buf2[i] = base + uint32(i)
  1102. }
  1103. // update values
  1104. n += moreVals
  1105. }
  1106. if moreVals+int(ri.curPosInIndex) > int(ri.rc.iv[ri.curIndex].length) {
  1107. ri.curPosInIndex = 0
  1108. ri.curIndex++
  1109. if ri.curIndex == int(len(ri.rc.iv)) {
  1110. break
  1111. }
  1112. } else {
  1113. ri.curPosInIndex += uint16(moreVals) //moreVals always fits in uint16
  1114. }
  1115. }
  1116. return n
  1117. }
  1118. func (ri *runIterator16) nextMany64(hs uint64, buf []uint64) int {
  1119. n := 0
  1120. if !ri.hasNext() {
  1121. return n
  1122. }
  1123. // start and end are inclusive
  1124. for n < len(buf) {
  1125. moreVals := 0
  1126. if ri.rc.iv[ri.curIndex].length >= ri.curPosInIndex {
  1127. // add as many as you can from this seq
  1128. moreVals = minOfInt(int(ri.rc.iv[ri.curIndex].length-ri.curPosInIndex)+1, len(buf)-n)
  1129. base := uint64(ri.rc.iv[ri.curIndex].start+ri.curPosInIndex) | hs
  1130. // allows BCE
  1131. buf2 := buf[n : n+moreVals]
  1132. for i := range buf2 {
  1133. buf2[i] = base + uint64(i)
  1134. }
  1135. // update values
  1136. n += moreVals
  1137. }
  1138. if moreVals+int(ri.curPosInIndex) > int(ri.rc.iv[ri.curIndex].length) {
  1139. ri.curPosInIndex = 0
  1140. ri.curIndex++
  1141. if ri.curIndex == int(len(ri.rc.iv)) {
  1142. break
  1143. }
  1144. } else {
  1145. ri.curPosInIndex += uint16(moreVals) //moreVals always fits in uint16
  1146. }
  1147. }
  1148. return n
  1149. }
  1150. // remove removes key from the container.
  1151. func (rc *runContainer16) removeKey(key uint16) (wasPresent bool) {
  1152. var index int
  1153. index, wasPresent, _ = rc.search(int(key))
  1154. if !wasPresent {
  1155. return // already removed, nothing to do.
  1156. }
  1157. pos := key - rc.iv[index].start
  1158. rc.deleteAt(&index, &pos)
  1159. return
  1160. }
  1161. // internal helper functions
  1162. func (rc *runContainer16) deleteAt(curIndex *int, curPosInIndex *uint16) {
  1163. ci := *curIndex
  1164. pos := *curPosInIndex
  1165. // are we first, last, or in the middle of our interval16?
  1166. switch {
  1167. case pos == 0:
  1168. if int(rc.iv[ci].length) == 0 {
  1169. // our interval disappears
  1170. rc.iv = append(rc.iv[:ci], rc.iv[ci+1:]...)
  1171. // curIndex stays the same, since the delete did
  1172. // the advance for us.
  1173. *curPosInIndex = 0
  1174. } else {
  1175. rc.iv[ci].start++ // no longer overflowable
  1176. rc.iv[ci].length--
  1177. }
  1178. case pos == rc.iv[ci].length:
  1179. // length
  1180. rc.iv[ci].length--
  1181. // our interval16 cannot disappear, else we would have been pos == 0, case first above.
  1182. *curPosInIndex--
  1183. // if we leave *curIndex alone, then Next() will work properly even after the delete.
  1184. default:
  1185. //middle
  1186. // split into two, adding an interval16
  1187. new0 := newInterval16Range(rc.iv[ci].start, rc.iv[ci].start+*curPosInIndex-1)
  1188. new1start := int(rc.iv[ci].start+*curPosInIndex) + 1
  1189. if new1start > int(MaxUint16) {
  1190. panic("overflow?!?!")
  1191. }
  1192. new1 := newInterval16Range(uint16(new1start), rc.iv[ci].last())
  1193. tail := append([]interval16{new0, new1}, rc.iv[ci+1:]...)
  1194. rc.iv = append(rc.iv[:ci], tail...)
  1195. // update curIndex and curPosInIndex
  1196. *curIndex++
  1197. *curPosInIndex = 0
  1198. }
  1199. }
  1200. func have4Overlap16(astart, alast, bstart, blast int) bool {
  1201. if alast+1 <= bstart {
  1202. return false
  1203. }
  1204. return blast+1 > astart
  1205. }
  1206. func intersectWithLeftover16(astart, alast, bstart, blast int) (isOverlap, isLeftoverA, isLeftoverB bool, leftoverstart int, intersection interval16) {
  1207. if !have4Overlap16(astart, alast, bstart, blast) {
  1208. return
  1209. }
  1210. isOverlap = true
  1211. // do the intersection:
  1212. if bstart > astart {
  1213. intersection.start = uint16(bstart)
  1214. } else {
  1215. intersection.start = uint16(astart)
  1216. }
  1217. switch {
  1218. case blast < alast:
  1219. isLeftoverA = true
  1220. leftoverstart = blast + 1
  1221. intersection.length = uint16(blast) - intersection.start
  1222. case alast < blast:
  1223. isLeftoverB = true
  1224. leftoverstart = alast + 1
  1225. intersection.length = uint16(alast) - intersection.start
  1226. default:
  1227. // alast == blast
  1228. intersection.length = uint16(alast) - intersection.start
  1229. }
  1230. return
  1231. }
  1232. func (rc *runContainer16) findNextIntervalThatIntersectsStartingFrom(startIndex int, key int) (index int, done bool) {
  1233. w, _, _ := rc.searchRange(key, startIndex, 0)
  1234. // rc.search always returns w < len(rc.iv)
  1235. if w < startIndex {
  1236. // not found and comes before lower bound startIndex,
  1237. // so just use the lower bound.
  1238. if startIndex == int(len(rc.iv)) {
  1239. // also this bump up means that we are done
  1240. return startIndex, true
  1241. }
  1242. return startIndex, false
  1243. }
  1244. return w, false
  1245. }
  1246. func sliceToString16(m []interval16) string {
  1247. s := ""
  1248. for i := range m {
  1249. s += fmt.Sprintf("%v: %s, ", i, m[i])
  1250. }
  1251. return s
  1252. }
  1253. // helper for invert
  1254. func (rc *runContainer16) invertlastInterval(origin uint16, lastIdx int) []interval16 {
  1255. cur := rc.iv[lastIdx]
  1256. if cur.last() == MaxUint16 {
  1257. if cur.start == origin {
  1258. return nil // empty container
  1259. }
  1260. return []interval16{newInterval16Range(origin, cur.start-1)}
  1261. }
  1262. if cur.start == origin {
  1263. return []interval16{newInterval16Range(cur.last()+1, MaxUint16)}
  1264. }
  1265. // invert splits
  1266. return []interval16{
  1267. newInterval16Range(origin, cur.start-1),
  1268. newInterval16Range(cur.last()+1, MaxUint16),
  1269. }
  1270. }
  1271. // invert returns a new container (not inplace), that is
  1272. // the inversion of rc. For each bit b in rc, the
  1273. // returned value has !b
  1274. func (rc *runContainer16) invert() *runContainer16 {
  1275. ni := len(rc.iv)
  1276. var m []interval16
  1277. switch ni {
  1278. case 0:
  1279. return &runContainer16{iv: []interval16{newInterval16Range(0, MaxUint16)}}
  1280. case 1:
  1281. return &runContainer16{iv: rc.invertlastInterval(0, 0)}
  1282. }
  1283. var invstart int
  1284. ult := ni - 1
  1285. for i, cur := range rc.iv {
  1286. if i == ult {
  1287. // invertlastInteval will add both intervals (b) and (c) in
  1288. // diagram below.
  1289. m = append(m, rc.invertlastInterval(uint16(invstart), i)...)
  1290. break
  1291. }
  1292. // INVAR: i and cur are not the last interval, there is a next at i+1
  1293. //
  1294. // ........[cur.start, cur.last] ...... [next.start, next.last]....
  1295. // ^ ^ ^
  1296. // (a) (b) (c)
  1297. //
  1298. // Now: we add interval (a); but if (a) is empty, for cur.start==0, we skip it.
  1299. if cur.start > 0 {
  1300. m = append(m, newInterval16Range(uint16(invstart), cur.start-1))
  1301. }
  1302. invstart = int(cur.last() + 1)
  1303. }
  1304. return &runContainer16{iv: m}
  1305. }
  1306. func (iv interval16) equal(b interval16) bool {
  1307. return iv.start == b.start && iv.length == b.length
  1308. }
  1309. func (iv interval16) isSuperSetOf(b interval16) bool {
  1310. return iv.start <= b.start && b.last() <= iv.last()
  1311. }
  1312. func (iv interval16) subtractInterval(del interval16) (left []interval16, delcount int) {
  1313. isect, isEmpty := intersectInterval16s(iv, del)
  1314. if isEmpty {
  1315. return nil, 0
  1316. }
  1317. if del.isSuperSetOf(iv) {
  1318. return nil, iv.runlen()
  1319. }
  1320. switch {
  1321. case isect.start > iv.start && isect.last() < iv.last():
  1322. new0 := newInterval16Range(iv.start, isect.start-1)
  1323. new1 := newInterval16Range(isect.last()+1, iv.last())
  1324. return []interval16{new0, new1}, isect.runlen()
  1325. case isect.start == iv.start:
  1326. return []interval16{newInterval16Range(isect.last()+1, iv.last())}, isect.runlen()
  1327. default:
  1328. return []interval16{newInterval16Range(iv.start, isect.start-1)}, isect.runlen()
  1329. }
  1330. }
  1331. func (rc *runContainer16) isubtract(del interval16) {
  1332. origiv := make([]interval16, len(rc.iv))
  1333. copy(origiv, rc.iv)
  1334. n := int(len(rc.iv))
  1335. if n == 0 {
  1336. return // already done.
  1337. }
  1338. _, isEmpty := intersectInterval16s(newInterval16Range(rc.iv[0].start, rc.iv[n-1].last()), del)
  1339. if isEmpty {
  1340. return // done
  1341. }
  1342. // INVAR there is some intersection between rc and del
  1343. istart, startAlready, _ := rc.search(int(del.start))
  1344. ilast, lastAlready, _ := rc.search(int(del.last()))
  1345. if istart == -1 {
  1346. if ilast == n-1 && !lastAlready {
  1347. rc.iv = nil
  1348. return
  1349. }
  1350. }
  1351. // some intervals will remain
  1352. switch {
  1353. case startAlready && lastAlready:
  1354. res0, _ := rc.iv[istart].subtractInterval(del)
  1355. // would overwrite values in iv b/c res0 can have len 2. so
  1356. // write to origiv instead.
  1357. lost := 1 + ilast - istart
  1358. changeSize := int(len(res0)) - lost
  1359. newSize := int(len(rc.iv)) + changeSize
  1360. // rc.iv = append(pre, caboose...)
  1361. // return
  1362. if ilast != istart {
  1363. res1, _ := rc.iv[ilast].subtractInterval(del)
  1364. res0 = append(res0, res1...)
  1365. changeSize = int(len(res0)) - lost
  1366. newSize = int(len(rc.iv)) + changeSize
  1367. }
  1368. switch {
  1369. case changeSize < 0:
  1370. // shrink
  1371. copy(rc.iv[istart+int(len(res0)):], rc.iv[ilast+1:])
  1372. copy(rc.iv[istart:istart+int(len(res0))], res0)
  1373. rc.iv = rc.iv[:newSize]
  1374. return
  1375. case changeSize == 0:
  1376. // stay the same
  1377. copy(rc.iv[istart:istart+int(len(res0))], res0)
  1378. return
  1379. default:
  1380. // changeSize > 0 is only possible when ilast == istart.
  1381. // Hence we now know: changeSize == 1 and len(res0) == 2
  1382. rc.iv = append(rc.iv, interval16{})
  1383. // len(rc.iv) is correct now, no need to rc.iv = rc.iv[:newSize]
  1384. // copy the tail into place
  1385. copy(rc.iv[ilast+2:], rc.iv[ilast+1:])
  1386. // copy the new item(s) into place
  1387. copy(rc.iv[istart:istart+2], res0)
  1388. return
  1389. }
  1390. case !startAlready && !lastAlready:
  1391. // we get to discard whole intervals
  1392. // from the search() definition:
  1393. // if del.start is not present, then istart is
  1394. // set as follows:
  1395. //
  1396. // a) istart == n-1 if del.start is beyond our
  1397. // last interval16 in rc.iv;
  1398. //
  1399. // b) istart == -1 if del.start is before our first
  1400. // interval16 in rc.iv;
  1401. //
  1402. // c) istart is set to the minimum index of rc.iv
  1403. // which comes strictly before the del.start;
  1404. // so del.start > rc.iv[istart].last,
  1405. // and if istart+1 exists, then del.start < rc.iv[istart+1].startx
  1406. // if del.last is not present, then ilast is
  1407. // set as follows:
  1408. //
  1409. // a) ilast == n-1 if del.last is beyond our
  1410. // last interval16 in rc.iv;
  1411. //
  1412. // b) ilast == -1 if del.last is before our first
  1413. // interval16 in rc.iv;
  1414. //
  1415. // c) ilast is set to the minimum index of rc.iv
  1416. // which comes strictly before the del.last;
  1417. // so del.last > rc.iv[ilast].last,
  1418. // and if ilast+1 exists, then del.last < rc.iv[ilast+1].start
  1419. // INVAR: istart >= 0
  1420. pre := rc.iv[:istart+1]
  1421. if ilast == n-1 {
  1422. rc.iv = pre
  1423. return
  1424. }
  1425. // INVAR: ilast < n-1
  1426. lost := ilast - istart
  1427. changeSize := -lost
  1428. newSize := int(len(rc.iv)) + changeSize
  1429. if changeSize != 0 {
  1430. copy(rc.iv[ilast+1+changeSize:], rc.iv[ilast+1:])
  1431. }
  1432. rc.iv = rc.iv[:newSize]
  1433. return
  1434. case startAlready && !lastAlready:
  1435. // we can only shrink or stay the same size
  1436. // i.e. we either eliminate the whole interval,
  1437. // or just cut off the right side.
  1438. res0, _ := rc.iv[istart].subtractInterval(del)
  1439. if len(res0) > 0 {
  1440. // len(res) must be 1
  1441. rc.iv[istart] = res0[0]
  1442. }
  1443. lost := 1 + (ilast - istart)
  1444. changeSize := int(len(res0)) - lost
  1445. newSize := int(len(rc.iv)) + changeSize
  1446. if changeSize != 0 {
  1447. copy(rc.iv[ilast+1+changeSize:], rc.iv[ilast+1:])
  1448. }
  1449. rc.iv = rc.iv[:newSize]
  1450. return
  1451. case !startAlready && lastAlready:
  1452. // we can only shrink or stay the same size
  1453. res1, _ := rc.iv[ilast].subtractInterval(del)
  1454. lost := ilast - istart
  1455. changeSize := int(len(res1)) - lost
  1456. newSize := int(len(rc.iv)) + changeSize
  1457. if changeSize != 0 {
  1458. // move the tail first to make room for res1
  1459. copy(rc.iv[ilast+1+changeSize:], rc.iv[ilast+1:])
  1460. }
  1461. copy(rc.iv[istart+1:], res1)
  1462. rc.iv = rc.iv[:newSize]
  1463. return
  1464. }
  1465. }
  1466. // compute rc minus b, and return the result as a new value (not inplace).
  1467. // port of run_container_andnot from CRoaring...
  1468. // https://github.com/RoaringBitmap/CRoaring/blob/master/src/containers/run.c#L435-L496
  1469. func (rc *runContainer16) AndNotRunContainer16(b *runContainer16) *runContainer16 {
  1470. if len(b.iv) == 0 || len(rc.iv) == 0 {
  1471. return rc
  1472. }
  1473. dst := newRunContainer16()
  1474. apos := 0
  1475. bpos := 0
  1476. a := rc
  1477. astart := a.iv[apos].start
  1478. alast := a.iv[apos].last()
  1479. bstart := b.iv[bpos].start
  1480. blast := b.iv[bpos].last()
  1481. alen := len(a.iv)
  1482. blen := len(b.iv)
  1483. for apos < alen && bpos < blen {
  1484. switch {
  1485. case alast < bstart:
  1486. // output the first run
  1487. dst.iv = append(dst.iv, newInterval16Range(astart, alast))
  1488. apos++
  1489. if apos < alen {
  1490. astart = a.iv[apos].start
  1491. alast = a.iv[apos].last()
  1492. }
  1493. case blast < astart:
  1494. // exit the second run
  1495. bpos++
  1496. if bpos < blen {
  1497. bstart = b.iv[bpos].start
  1498. blast = b.iv[bpos].last()
  1499. }
  1500. default:
  1501. // a: [ ]
  1502. // b: [ ]
  1503. // alast >= bstart
  1504. // blast >= astart
  1505. if astart < bstart {
  1506. dst.iv = append(dst.iv, newInterval16Range(astart, bstart-1))
  1507. }
  1508. if alast > blast {
  1509. astart = blast + 1
  1510. } else {
  1511. apos++
  1512. if apos < alen {
  1513. astart = a.iv[apos].start
  1514. alast = a.iv[apos].last()
  1515. }
  1516. }
  1517. }
  1518. }
  1519. if apos < alen {
  1520. dst.iv = append(dst.iv, newInterval16Range(astart, alast))
  1521. apos++
  1522. if apos < alen {
  1523. dst.iv = append(dst.iv, a.iv[apos:]...)
  1524. }
  1525. }
  1526. return dst
  1527. }
  1528. func (rc *runContainer16) numberOfRuns() (nr int) {
  1529. return len(rc.iv)
  1530. }
  1531. func (rc *runContainer16) containerType() contype {
  1532. return run16Contype
  1533. }
  1534. func (rc *runContainer16) equals16(srb *runContainer16) bool {
  1535. // Check if the containers are the same object.
  1536. if rc == srb {
  1537. return true
  1538. }
  1539. if len(srb.iv) != len(rc.iv) {
  1540. return false
  1541. }
  1542. for i, v := range rc.iv {
  1543. if v != srb.iv[i] {
  1544. return false
  1545. }
  1546. }
  1547. return true
  1548. }
  1549. // compile time verify we meet interface requirements
  1550. var _ container = &runContainer16{}
  1551. func (rc *runContainer16) clone() container {
  1552. return newRunContainer16CopyIv(rc.iv)
  1553. }
  1554. func (rc *runContainer16) minimum() uint16 {
  1555. return rc.iv[0].start // assume not empty
  1556. }
  1557. func (rc *runContainer16) maximum() uint16 {
  1558. return rc.iv[len(rc.iv)-1].last() // assume not empty
  1559. }
  1560. func (rc *runContainer16) isFull() bool {
  1561. return (len(rc.iv) == 1) && ((rc.iv[0].start == 0) && (rc.iv[0].last() == MaxUint16))
  1562. }
  1563. func (rc *runContainer16) and(a container) container {
  1564. if rc.isFull() {
  1565. return a.clone()
  1566. }
  1567. switch c := a.(type) {
  1568. case *runContainer16:
  1569. return rc.intersect(c)
  1570. case *arrayContainer:
  1571. return rc.andArray(c)
  1572. case *bitmapContainer:
  1573. return rc.andBitmapContainer(c)
  1574. }
  1575. panic("unsupported container type")
  1576. }
  1577. func (rc *runContainer16) andCardinality(a container) int {
  1578. switch c := a.(type) {
  1579. case *runContainer16:
  1580. return int(rc.intersectCardinality(c))
  1581. case *arrayContainer:
  1582. return rc.andArrayCardinality(c)
  1583. case *bitmapContainer:
  1584. return rc.andBitmapContainerCardinality(c)
  1585. }
  1586. panic("unsupported container type")
  1587. }
  1588. // andBitmapContainer finds the intersection of rc and b.
  1589. func (rc *runContainer16) andBitmapContainer(bc *bitmapContainer) container {
  1590. bc2 := newBitmapContainerFromRun(rc)
  1591. return bc2.andBitmap(bc)
  1592. }
  1593. func (rc *runContainer16) andArrayCardinality(ac *arrayContainer) int {
  1594. pos := 0
  1595. answer := 0
  1596. maxpos := ac.getCardinality()
  1597. if maxpos == 0 {
  1598. return 0 // won't happen in actual code
  1599. }
  1600. v := ac.content[pos]
  1601. mainloop:
  1602. for _, p := range rc.iv {
  1603. for v < p.start {
  1604. pos++
  1605. if pos == maxpos {
  1606. break mainloop
  1607. }
  1608. v = ac.content[pos]
  1609. }
  1610. for v <= p.last() {
  1611. answer++
  1612. pos++
  1613. if pos == maxpos {
  1614. break mainloop
  1615. }
  1616. v = ac.content[pos]
  1617. }
  1618. }
  1619. return answer
  1620. }
  1621. func (rc *runContainer16) iand(a container) container {
  1622. if rc.isFull() {
  1623. return a.clone()
  1624. }
  1625. switch c := a.(type) {
  1626. case *runContainer16:
  1627. return rc.inplaceIntersect(c)
  1628. case *arrayContainer:
  1629. return rc.andArray(c)
  1630. case *bitmapContainer:
  1631. return rc.iandBitmapContainer(c)
  1632. }
  1633. panic("unsupported container type")
  1634. }
  1635. func (rc *runContainer16) inplaceIntersect(rc2 *runContainer16) container {
  1636. sect := rc.intersect(rc2)
  1637. *rc = *sect
  1638. return rc
  1639. }
  1640. func (rc *runContainer16) iandBitmapContainer(bc *bitmapContainer) container {
  1641. isect := rc.andBitmapContainer(bc)
  1642. *rc = *newRunContainer16FromContainer(isect)
  1643. return rc
  1644. }
  1645. func (rc *runContainer16) andArray(ac *arrayContainer) container {
  1646. if len(rc.iv) == 0 {
  1647. return newArrayContainer()
  1648. }
  1649. acCardinality := ac.getCardinality()
  1650. c := newArrayContainerCapacity(acCardinality)
  1651. for rlePos, arrayPos := 0, 0; arrayPos < acCardinality; {
  1652. iv := rc.iv[rlePos]
  1653. arrayVal := ac.content[arrayPos]
  1654. for iv.last() < arrayVal {
  1655. rlePos++
  1656. if rlePos == len(rc.iv) {
  1657. return c
  1658. }
  1659. iv = rc.iv[rlePos]
  1660. }
  1661. if iv.start > arrayVal {
  1662. arrayPos = advanceUntil(ac.content, arrayPos, len(ac.content), iv.start)
  1663. } else {
  1664. c.content = append(c.content, arrayVal)
  1665. arrayPos++
  1666. }
  1667. }
  1668. return c
  1669. }
  1670. func (rc *runContainer16) andNot(a container) container {
  1671. switch c := a.(type) {
  1672. case *arrayContainer:
  1673. return rc.andNotArray(c)
  1674. case *bitmapContainer:
  1675. return rc.andNotBitmap(c)
  1676. case *runContainer16:
  1677. return rc.andNotRunContainer16(c)
  1678. }
  1679. panic("unsupported container type")
  1680. }
  1681. func (rc *runContainer16) fillLeastSignificant16bits(x []uint32, i int, mask uint32) int {
  1682. k := i
  1683. var val int
  1684. for _, p := range rc.iv {
  1685. n := p.runlen()
  1686. for j := int(0); j < n; j++ {
  1687. val = int(p.start) + j
  1688. x[k] = uint32(val) | mask
  1689. k++
  1690. }
  1691. }
  1692. return k
  1693. }
  1694. func (rc *runContainer16) getShortIterator() shortPeekable {
  1695. return rc.newRunIterator16()
  1696. }
  1697. func (rc *runContainer16) getReverseIterator() shortIterable {
  1698. return rc.newRunReverseIterator16()
  1699. }
  1700. func (rc *runContainer16) getManyIterator() manyIterable {
  1701. return rc.newManyRunIterator16()
  1702. }
  1703. // add the values in the range [firstOfRange, endx). endx
  1704. // is still abe to express 2^16 because it is an int not an uint16.
  1705. func (rc *runContainer16) iaddRange(firstOfRange, endx int) container {
  1706. if firstOfRange > endx {
  1707. panic(fmt.Sprintf("invalid %v = endx > firstOfRange", endx))
  1708. }
  1709. if firstOfRange == endx {
  1710. return rc
  1711. }
  1712. addme := newRunContainer16TakeOwnership([]interval16{
  1713. {
  1714. start: uint16(firstOfRange),
  1715. length: uint16(endx - 1 - firstOfRange),
  1716. },
  1717. })
  1718. *rc = *rc.union(addme)
  1719. return rc
  1720. }
  1721. // remove the values in the range [firstOfRange,endx)
  1722. func (rc *runContainer16) iremoveRange(firstOfRange, endx int) container {
  1723. if firstOfRange > endx {
  1724. panic(fmt.Sprintf("request to iremove empty set [%v, %v),"+
  1725. " nothing to do.", firstOfRange, endx))
  1726. }
  1727. // empty removal
  1728. if firstOfRange == endx {
  1729. return rc
  1730. }
  1731. x := newInterval16Range(uint16(firstOfRange), uint16(endx-1))
  1732. rc.isubtract(x)
  1733. return rc
  1734. }
  1735. // not flip the values in the range [firstOfRange,endx)
  1736. func (rc *runContainer16) not(firstOfRange, endx int) container {
  1737. if firstOfRange > endx {
  1738. panic(fmt.Sprintf("invalid %v = endx > firstOfRange = %v", endx, firstOfRange))
  1739. }
  1740. return rc.Not(firstOfRange, endx)
  1741. }
  1742. // Not flips the values in the range [firstOfRange,endx).
  1743. // This is not inplace. Only the returned value has the flipped bits.
  1744. //
  1745. // Currently implemented as (!A intersect B) union (A minus B),
  1746. // where A is rc, and B is the supplied [firstOfRange, endx) interval.
  1747. //
  1748. // TODO(time optimization): convert this to a single pass
  1749. // algorithm by copying AndNotRunContainer16() and modifying it.
  1750. // Current routine is correct but
  1751. // makes 2 more passes through the arrays than should be
  1752. // strictly necessary. Measure both ways though--this may not matter.
  1753. //
  1754. func (rc *runContainer16) Not(firstOfRange, endx int) *runContainer16 {
  1755. if firstOfRange > endx {
  1756. panic(fmt.Sprintf("invalid %v = endx > firstOfRange == %v", endx, firstOfRange))
  1757. }
  1758. if firstOfRange >= endx {
  1759. return rc.Clone()
  1760. }
  1761. a := rc
  1762. // algo:
  1763. // (!A intersect B) union (A minus B)
  1764. nota := a.invert()
  1765. bs := []interval16{newInterval16Range(uint16(firstOfRange), uint16(endx-1))}
  1766. b := newRunContainer16TakeOwnership(bs)
  1767. notAintersectB := nota.intersect(b)
  1768. aMinusB := a.AndNotRunContainer16(b)
  1769. rc2 := notAintersectB.union(aMinusB)
  1770. return rc2
  1771. }
  1772. // equals is now logical equals; it does not require the
  1773. // same underlying container type.
  1774. func (rc *runContainer16) equals(o container) bool {
  1775. srb, ok := o.(*runContainer16)
  1776. if !ok {
  1777. // maybe value instead of pointer
  1778. val, valok := o.(*runContainer16)
  1779. if valok {
  1780. srb = val
  1781. ok = true
  1782. }
  1783. }
  1784. if ok {
  1785. // Check if the containers are the same object.
  1786. if rc == srb {
  1787. return true
  1788. }
  1789. if len(srb.iv) != len(rc.iv) {
  1790. return false
  1791. }
  1792. for i, v := range rc.iv {
  1793. if v != srb.iv[i] {
  1794. return false
  1795. }
  1796. }
  1797. return true
  1798. }
  1799. // use generic comparison
  1800. if o.getCardinality() != rc.getCardinality() {
  1801. return false
  1802. }
  1803. rit := rc.getShortIterator()
  1804. bit := o.getShortIterator()
  1805. //k := 0
  1806. for rit.hasNext() {
  1807. if bit.next() != rit.next() {
  1808. return false
  1809. }
  1810. //k++
  1811. }
  1812. return true
  1813. }
  1814. func (rc *runContainer16) iaddReturnMinimized(x uint16) container {
  1815. rc.Add(x)
  1816. return rc
  1817. }
  1818. func (rc *runContainer16) iadd(x uint16) (wasNew bool) {
  1819. return rc.Add(x)
  1820. }
  1821. func (rc *runContainer16) iremoveReturnMinimized(x uint16) container {
  1822. rc.removeKey(x)
  1823. return rc
  1824. }
  1825. func (rc *runContainer16) iremove(x uint16) bool {
  1826. return rc.removeKey(x)
  1827. }
  1828. func (rc *runContainer16) or(a container) container {
  1829. if rc.isFull() {
  1830. return rc.clone()
  1831. }
  1832. switch c := a.(type) {
  1833. case *runContainer16:
  1834. return rc.union(c)
  1835. case *arrayContainer:
  1836. return rc.orArray(c)
  1837. case *bitmapContainer:
  1838. return rc.orBitmapContainer(c)
  1839. }
  1840. panic("unsupported container type")
  1841. }
  1842. func (rc *runContainer16) orCardinality(a container) int {
  1843. switch c := a.(type) {
  1844. case *runContainer16:
  1845. return int(rc.unionCardinality(c))
  1846. case *arrayContainer:
  1847. return rc.orArrayCardinality(c)
  1848. case *bitmapContainer:
  1849. return rc.orBitmapContainerCardinality(c)
  1850. }
  1851. panic("unsupported container type")
  1852. }
  1853. // orBitmapContainer finds the union of rc and bc.
  1854. func (rc *runContainer16) orBitmapContainer(bc *bitmapContainer) container {
  1855. bc2 := newBitmapContainerFromRun(rc)
  1856. return bc2.iorBitmap(bc)
  1857. }
  1858. func (rc *runContainer16) andBitmapContainerCardinality(bc *bitmapContainer) int {
  1859. answer := 0
  1860. for i := range rc.iv {
  1861. answer += bc.getCardinalityInRange(uint(rc.iv[i].start), uint(rc.iv[i].last())+1)
  1862. }
  1863. //bc.computeCardinality()
  1864. return answer
  1865. }
  1866. func (rc *runContainer16) orBitmapContainerCardinality(bc *bitmapContainer) int {
  1867. return rc.getCardinality() + bc.getCardinality() - rc.andBitmapContainerCardinality(bc)
  1868. }
  1869. // orArray finds the union of rc and ac.
  1870. func (rc *runContainer16) orArray(ac *arrayContainer) container {
  1871. if ac.isEmpty() {
  1872. return rc.clone()
  1873. }
  1874. if rc.isEmpty() {
  1875. return ac.clone()
  1876. }
  1877. intervals, cardMinusOne := runArrayUnionToRuns(rc, ac)
  1878. result := newRunContainer16TakeOwnership(intervals)
  1879. if len(intervals) >= 2048 && cardMinusOne >= arrayDefaultMaxSize {
  1880. return newBitmapContainerFromRun(result)
  1881. }
  1882. if len(intervals)*2 > 1+int(cardMinusOne) {
  1883. return result.toArrayContainer()
  1884. }
  1885. return result
  1886. }
  1887. // orArray finds the union of rc and ac.
  1888. func (rc *runContainer16) orArrayCardinality(ac *arrayContainer) int {
  1889. return ac.getCardinality() + rc.getCardinality() - rc.andArrayCardinality(ac)
  1890. }
  1891. func (rc *runContainer16) ior(a container) container {
  1892. if rc.isFull() {
  1893. return rc
  1894. }
  1895. switch c := a.(type) {
  1896. case *runContainer16:
  1897. return rc.inplaceUnion(c)
  1898. case *arrayContainer:
  1899. return rc.iorArray(c)
  1900. case *bitmapContainer:
  1901. return rc.iorBitmapContainer(c)
  1902. }
  1903. panic("unsupported container type")
  1904. }
  1905. func (rc *runContainer16) inplaceUnion(rc2 *runContainer16) container {
  1906. for _, p := range rc2.iv {
  1907. last := int(p.last())
  1908. for i := int(p.start); i <= last; i++ {
  1909. rc.Add(uint16(i))
  1910. }
  1911. }
  1912. return rc
  1913. }
  1914. func (rc *runContainer16) iorBitmapContainer(bc *bitmapContainer) container {
  1915. it := bc.getShortIterator()
  1916. for it.hasNext() {
  1917. rc.Add(it.next())
  1918. }
  1919. return rc
  1920. }
  1921. func (rc *runContainer16) iorArray(ac *arrayContainer) container {
  1922. if rc.isEmpty() {
  1923. return ac.clone()
  1924. }
  1925. if ac.isEmpty() {
  1926. return rc
  1927. }
  1928. var cardMinusOne uint16
  1929. //TODO: perform the union algorithm in-place using rc.iv
  1930. // this can be done with methods like the in-place array container union
  1931. // but maybe lazily moving the remaining elements back.
  1932. rc.iv, cardMinusOne = runArrayUnionToRuns(rc, ac)
  1933. if len(rc.iv) >= 2048 && cardMinusOne >= arrayDefaultMaxSize {
  1934. return newBitmapContainerFromRun(rc)
  1935. }
  1936. if len(rc.iv)*2 > 1+int(cardMinusOne) {
  1937. return rc.toArrayContainer()
  1938. }
  1939. return rc
  1940. }
  1941. func runArrayUnionToRuns(rc *runContainer16, ac *arrayContainer) ([]interval16, uint16) {
  1942. pos1 := 0
  1943. pos2 := 0
  1944. length1 := len(ac.content)
  1945. length2 := len(rc.iv)
  1946. target := make([]interval16, 0, len(rc.iv))
  1947. // have to find the first range
  1948. // options are
  1949. // 1. from array container
  1950. // 2. from run container
  1951. var previousInterval interval16
  1952. var cardMinusOne uint16
  1953. if ac.content[0] < rc.iv[0].start {
  1954. previousInterval.start = ac.content[0]
  1955. previousInterval.length = 0
  1956. pos1++
  1957. } else {
  1958. previousInterval.start = rc.iv[0].start
  1959. previousInterval.length = rc.iv[0].length
  1960. pos2++
  1961. }
  1962. for pos1 < length1 || pos2 < length2 {
  1963. if pos1 < length1 {
  1964. s1 := ac.content[pos1]
  1965. if s1 <= previousInterval.start+previousInterval.length {
  1966. pos1++
  1967. continue
  1968. }
  1969. if previousInterval.last() < MaxUint16 && previousInterval.last()+1 == s1 {
  1970. previousInterval.length++
  1971. pos1++
  1972. continue
  1973. }
  1974. }
  1975. if pos2 < length2 {
  1976. range2 := rc.iv[pos2]
  1977. if range2.start <= previousInterval.last() || range2.start > 0 && range2.start-1 == previousInterval.last() {
  1978. pos2++
  1979. if previousInterval.last() < range2.last() {
  1980. previousInterval.length = range2.last() - previousInterval.start
  1981. }
  1982. continue
  1983. }
  1984. }
  1985. cardMinusOne += previousInterval.length + 1
  1986. target = append(target, previousInterval)
  1987. if pos2 == length2 || pos1 < length1 && ac.content[pos1] < rc.iv[pos2].start {
  1988. previousInterval.start = ac.content[pos1]
  1989. previousInterval.length = 0
  1990. pos1++
  1991. } else {
  1992. previousInterval = rc.iv[pos2]
  1993. pos2++
  1994. }
  1995. }
  1996. cardMinusOne += previousInterval.length
  1997. target = append(target, previousInterval)
  1998. return target, cardMinusOne
  1999. }
  2000. // lazyIOR is described (not yet implemented) in
  2001. // this nice note from @lemire on
  2002. // https://github.com/RoaringBitmap/roaring/pull/70#issuecomment-263613737
  2003. //
  2004. // Description of lazyOR and lazyIOR from @lemire:
  2005. //
  2006. // Lazy functions are optional and can be simply
  2007. // wrapper around non-lazy functions.
  2008. //
  2009. // The idea of "laziness" is as follows. It is
  2010. // inspired by the concept of lazy evaluation
  2011. // you might be familiar with (functional programming
  2012. // and all that). So a roaring bitmap is
  2013. // such that all its containers are, in some
  2014. // sense, chosen to use as little memory as
  2015. // possible. This is nice. Also, all bitsets
  2016. // are "cardinality aware" so that you can do
  2017. // fast rank/select queries, or query the
  2018. // cardinality of the whole bitmap... very fast,
  2019. // without latency.
  2020. //
  2021. // However, imagine that you are aggregating 100
  2022. // bitmaps together. So you OR the first two, then OR
  2023. // that with the third one and so forth. Clearly,
  2024. // intermediate bitmaps don't need to be as
  2025. // compressed as possible, right? They can be
  2026. // in a "dirty state". You only need the end
  2027. // result to be in a nice state... which you
  2028. // can achieve by calling repairAfterLazy at the end.
  2029. //
  2030. // The Java/C code does something special for
  2031. // the in-place lazy OR runs. The idea is that
  2032. // instead of taking two run containers and
  2033. // generating a new one, we actually try to
  2034. // do the computation in-place through a
  2035. // technique invented by @gssiyankai (pinging him!).
  2036. // What you do is you check whether the host
  2037. // run container has lots of extra capacity.
  2038. // If it does, you move its data at the end of
  2039. // the backing array, and then you write
  2040. // the answer at the beginning. What this
  2041. // trick does is minimize memory allocations.
  2042. //
  2043. func (rc *runContainer16) lazyIOR(a container) container {
  2044. // not lazy at the moment
  2045. return rc.ior(a)
  2046. }
  2047. // lazyOR is described above in lazyIOR.
  2048. func (rc *runContainer16) lazyOR(a container) container {
  2049. // not lazy at the moment
  2050. return rc.or(a)
  2051. }
  2052. func (rc *runContainer16) intersects(a container) bool {
  2053. // TODO: optimize by doing inplace/less allocation
  2054. isect := rc.and(a)
  2055. return !isect.isEmpty()
  2056. }
  2057. func (rc *runContainer16) xor(a container) container {
  2058. switch c := a.(type) {
  2059. case *arrayContainer:
  2060. return rc.xorArray(c)
  2061. case *bitmapContainer:
  2062. return rc.xorBitmap(c)
  2063. case *runContainer16:
  2064. return rc.xorRunContainer16(c)
  2065. }
  2066. panic("unsupported container type")
  2067. }
  2068. func (rc *runContainer16) iandNot(a container) container {
  2069. switch c := a.(type) {
  2070. case *arrayContainer:
  2071. return rc.iandNotArray(c)
  2072. case *bitmapContainer:
  2073. return rc.iandNotBitmap(c)
  2074. case *runContainer16:
  2075. return rc.iandNotRunContainer16(c)
  2076. }
  2077. panic("unsupported container type")
  2078. }
  2079. // flip the values in the range [firstOfRange,endx)
  2080. func (rc *runContainer16) inot(firstOfRange, endx int) container {
  2081. if firstOfRange > endx {
  2082. panic(fmt.Sprintf("invalid %v = endx > firstOfRange = %v", endx, firstOfRange))
  2083. }
  2084. if firstOfRange > endx {
  2085. return rc
  2086. }
  2087. // TODO: minimize copies, do it all inplace; not() makes a copy.
  2088. rc = rc.Not(firstOfRange, endx)
  2089. return rc
  2090. }
  2091. func (rc *runContainer16) rank(x uint16) int {
  2092. n := int(len(rc.iv))
  2093. xx := int(x)
  2094. w, already, _ := rc.search(xx)
  2095. if w < 0 {
  2096. return 0
  2097. }
  2098. if !already && w == n-1 {
  2099. return rc.getCardinality()
  2100. }
  2101. var rnk int
  2102. if !already {
  2103. for i := int(0); i <= w; i++ {
  2104. rnk += rc.iv[i].runlen()
  2105. }
  2106. return int(rnk)
  2107. }
  2108. for i := int(0); i < w; i++ {
  2109. rnk += rc.iv[i].runlen()
  2110. }
  2111. rnk += int(x-rc.iv[w].start) + 1
  2112. return int(rnk)
  2113. }
  2114. func (rc *runContainer16) selectInt(x uint16) int {
  2115. var offset int
  2116. for k := range rc.iv {
  2117. nextOffset := offset + rc.iv[k].runlen()
  2118. if nextOffset > int(x) {
  2119. return int(int(rc.iv[k].start) + (int(x) - offset))
  2120. }
  2121. offset = nextOffset
  2122. }
  2123. panic("cannot select x")
  2124. }
  2125. func (rc *runContainer16) andNotRunContainer16(b *runContainer16) container {
  2126. return rc.AndNotRunContainer16(b)
  2127. }
  2128. func (rc *runContainer16) andNotArray(ac *arrayContainer) container {
  2129. rcb := rc.toBitmapContainer()
  2130. acb := ac.toBitmapContainer()
  2131. return rcb.andNotBitmap(acb)
  2132. }
  2133. func (rc *runContainer16) andNotBitmap(bc *bitmapContainer) container {
  2134. rcb := rc.toBitmapContainer()
  2135. return rcb.andNotBitmap(bc)
  2136. }
  2137. func (rc *runContainer16) toBitmapContainer() *bitmapContainer {
  2138. bc := newBitmapContainer()
  2139. for i := range rc.iv {
  2140. bc.iaddRange(int(rc.iv[i].start), int(rc.iv[i].last())+1)
  2141. }
  2142. bc.computeCardinality()
  2143. return bc
  2144. }
  2145. func (rc *runContainer16) iandNotRunContainer16(x2 *runContainer16) container {
  2146. rcb := rc.toBitmapContainer()
  2147. x2b := x2.toBitmapContainer()
  2148. rcb.iandNotBitmapSurely(x2b)
  2149. // TODO: check size and optimize the return value
  2150. // TODO: is inplace modification really required? If not, elide the copy.
  2151. rc2 := newRunContainer16FromBitmapContainer(rcb)
  2152. *rc = *rc2
  2153. return rc
  2154. }
  2155. func (rc *runContainer16) iandNotArray(ac *arrayContainer) container {
  2156. rcb := rc.toBitmapContainer()
  2157. acb := ac.toBitmapContainer()
  2158. rcb.iandNotBitmapSurely(acb)
  2159. // TODO: check size and optimize the return value
  2160. // TODO: is inplace modification really required? If not, elide the copy.
  2161. rc2 := newRunContainer16FromBitmapContainer(rcb)
  2162. *rc = *rc2
  2163. return rc
  2164. }
  2165. func (rc *runContainer16) iandNotBitmap(bc *bitmapContainer) container {
  2166. rcb := rc.toBitmapContainer()
  2167. rcb.iandNotBitmapSurely(bc)
  2168. // TODO: check size and optimize the return value
  2169. // TODO: is inplace modification really required? If not, elide the copy.
  2170. rc2 := newRunContainer16FromBitmapContainer(rcb)
  2171. *rc = *rc2
  2172. return rc
  2173. }
  2174. func (rc *runContainer16) xorRunContainer16(x2 *runContainer16) container {
  2175. rcb := rc.toBitmapContainer()
  2176. x2b := x2.toBitmapContainer()
  2177. return rcb.xorBitmap(x2b)
  2178. }
  2179. func (rc *runContainer16) xorArray(ac *arrayContainer) container {
  2180. rcb := rc.toBitmapContainer()
  2181. acb := ac.toBitmapContainer()
  2182. return rcb.xorBitmap(acb)
  2183. }
  2184. func (rc *runContainer16) xorBitmap(bc *bitmapContainer) container {
  2185. rcb := rc.toBitmapContainer()
  2186. return rcb.xorBitmap(bc)
  2187. }
  2188. // convert to bitmap or array *if needed*
  2189. func (rc *runContainer16) toEfficientContainer() container {
  2190. sizeAsRunContainer := rc.getSizeInBytes()
  2191. sizeAsBitmapContainer := bitmapContainerSizeInBytes()
  2192. card := rc.getCardinality()
  2193. sizeAsArrayContainer := arrayContainerSizeInBytes(card)
  2194. if sizeAsRunContainer <= minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) {
  2195. return rc
  2196. }
  2197. if card <= arrayDefaultMaxSize {
  2198. return rc.toArrayContainer()
  2199. }
  2200. bc := newBitmapContainerFromRun(rc)
  2201. return bc
  2202. }
  2203. func (rc *runContainer16) toArrayContainer() *arrayContainer {
  2204. ac := newArrayContainer()
  2205. for i := range rc.iv {
  2206. ac.iaddRange(int(rc.iv[i].start), int(rc.iv[i].last())+1)
  2207. }
  2208. return ac
  2209. }
  2210. func newRunContainer16FromContainer(c container) *runContainer16 {
  2211. switch x := c.(type) {
  2212. case *runContainer16:
  2213. return x.Clone()
  2214. case *arrayContainer:
  2215. return newRunContainer16FromArray(x)
  2216. case *bitmapContainer:
  2217. return newRunContainer16FromBitmapContainer(x)
  2218. }
  2219. panic("unsupported container type")
  2220. }
  2221. // And finds the intersection of rc and b.
  2222. func (rc *runContainer16) And(b *Bitmap) *Bitmap {
  2223. out := NewBitmap()
  2224. for _, p := range rc.iv {
  2225. plast := p.last()
  2226. for i := p.start; i <= plast; i++ {
  2227. if b.Contains(uint32(i)) {
  2228. out.Add(uint32(i))
  2229. }
  2230. }
  2231. }
  2232. return out
  2233. }
  2234. // Xor returns the exclusive-or of rc and b.
  2235. func (rc *runContainer16) Xor(b *Bitmap) *Bitmap {
  2236. out := b.Clone()
  2237. for _, p := range rc.iv {
  2238. plast := p.last()
  2239. for v := p.start; v <= plast; v++ {
  2240. w := uint32(v)
  2241. if out.Contains(w) {
  2242. out.RemoveRange(uint64(w), uint64(w+1))
  2243. } else {
  2244. out.Add(w)
  2245. }
  2246. }
  2247. }
  2248. return out
  2249. }
  2250. // Or returns the union of rc and b.
  2251. func (rc *runContainer16) Or(b *Bitmap) *Bitmap {
  2252. out := b.Clone()
  2253. for _, p := range rc.iv {
  2254. plast := p.last()
  2255. for v := p.start; v <= plast; v++ {
  2256. out.Add(uint32(v))
  2257. }
  2258. }
  2259. return out
  2260. }
  2261. // serializedSizeInBytes returns the number of bytes of memory
  2262. // required by this runContainer16. This is for the
  2263. // Roaring format, as specified https://github.com/RoaringBitmap/RoaringFormatSpec/
  2264. func (rc *runContainer16) serializedSizeInBytes() int {
  2265. // number of runs in one uint16, then each run
  2266. // needs two more uint16
  2267. return 2 + len(rc.iv)*4
  2268. }
  2269. func (rc *runContainer16) addOffset(x uint16) (container, container) {
  2270. var low, high *runContainer16
  2271. if len(rc.iv) == 0 {
  2272. return nil, nil
  2273. }
  2274. first := uint32(rc.iv[0].start) + uint32(x)
  2275. if highbits(first) == 0 {
  2276. // Some elements will fall into low part, allocate a container.
  2277. // Checking the first one is enough because they are ordered.
  2278. low = newRunContainer16()
  2279. }
  2280. last := uint32(rc.iv[len(rc.iv)-1].start)
  2281. last += uint32(rc.iv[len(rc.iv)-1].length)
  2282. last += uint32(x)
  2283. if highbits(last) > 0 {
  2284. // Some elements will fall into high part, allocate a container.
  2285. // Checking the last one is enough because they are ordered.
  2286. high = newRunContainer16()
  2287. }
  2288. for _, iv := range rc.iv {
  2289. val := int(iv.start) + int(x)
  2290. finalVal := int(val) + int(iv.length)
  2291. if val <= 0xffff {
  2292. if finalVal <= 0xffff {
  2293. low.iv = append(low.iv, interval16{uint16(val), iv.length})
  2294. } else {
  2295. low.iv = append(low.iv, interval16{uint16(val), uint16(0xffff - val)})
  2296. high.iv = append(high.iv, interval16{uint16(0), uint16(finalVal & 0xffff)})
  2297. }
  2298. } else {
  2299. high.iv = append(high.iv, interval16{uint16(val & 0xffff), iv.length})
  2300. }
  2301. }
  2302. // Ensure proper nil interface.
  2303. if low == nil {
  2304. return nil, high
  2305. }
  2306. if high == nil {
  2307. return low, nil
  2308. }
  2309. return low, high
  2310. }