bucket.go 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791
  1. package bbolt
  2. import (
  3. "bytes"
  4. "fmt"
  5. "unsafe"
  6. )
  7. const (
  8. // MaxKeySize is the maximum length of a key, in bytes.
  9. MaxKeySize = 32768
  10. // MaxValueSize is the maximum length of a value, in bytes.
  11. MaxValueSize = (1 << 31) - 2
  12. )
  13. const bucketHeaderSize = int(unsafe.Sizeof(bucket{}))
  14. const (
  15. minFillPercent = 0.1
  16. maxFillPercent = 1.0
  17. )
  18. // DefaultFillPercent is the percentage that split pages are filled.
  19. // This value can be changed by setting Bucket.FillPercent.
  20. const DefaultFillPercent = 0.5
  21. // Bucket represents a collection of key/value pairs inside the database.
  22. type Bucket struct {
  23. *bucket
  24. tx *Tx // the associated transaction
  25. buckets map[string]*Bucket // subbucket cache
  26. page *page // inline page reference
  27. rootNode *node // materialized node for the root page.
  28. nodes map[pgid]*node // node cache
  29. // Sets the threshold for filling nodes when they split. By default,
  30. // the bucket will fill to 50% but it can be useful to increase this
  31. // amount if you know that your write workloads are mostly append-only.
  32. //
  33. // This is non-persisted across transactions so it must be set in every Tx.
  34. FillPercent float64
  35. }
  36. // bucket represents the on-file representation of a bucket.
  37. // This is stored as the "value" of a bucket key. If the bucket is small enough,
  38. // then its root page can be stored inline in the "value", after the bucket
  39. // header. In the case of inline buckets, the "root" will be 0.
  40. type bucket struct {
  41. root pgid // page id of the bucket's root-level page
  42. sequence uint64 // monotonically incrementing, used by NextSequence()
  43. }
  44. // newBucket returns a new bucket associated with a transaction.
  45. func newBucket(tx *Tx) Bucket {
  46. var b = Bucket{tx: tx, FillPercent: DefaultFillPercent}
  47. if tx.writable {
  48. b.buckets = make(map[string]*Bucket)
  49. b.nodes = make(map[pgid]*node)
  50. }
  51. return b
  52. }
  53. // Tx returns the tx of the bucket.
  54. func (b *Bucket) Tx() *Tx {
  55. return b.tx
  56. }
  57. // Root returns the root of the bucket.
  58. func (b *Bucket) Root() pgid {
  59. return b.root
  60. }
  61. // Writable returns whether the bucket is writable.
  62. func (b *Bucket) Writable() bool {
  63. return b.tx.writable
  64. }
  65. // Cursor creates a cursor associated with the bucket.
  66. // The cursor is only valid as long as the transaction is open.
  67. // Do not use a cursor after the transaction is closed.
  68. func (b *Bucket) Cursor() *Cursor {
  69. // Update transaction statistics.
  70. b.tx.stats.IncCursorCount(1)
  71. // Allocate and return a cursor.
  72. return &Cursor{
  73. bucket: b,
  74. stack: make([]elemRef, 0),
  75. }
  76. }
  77. // Bucket retrieves a nested bucket by name.
  78. // Returns nil if the bucket does not exist.
  79. // The bucket instance is only valid for the lifetime of the transaction.
  80. func (b *Bucket) Bucket(name []byte) *Bucket {
  81. if b.buckets != nil {
  82. if child := b.buckets[string(name)]; child != nil {
  83. return child
  84. }
  85. }
  86. // Move cursor to key.
  87. c := b.Cursor()
  88. k, v, flags := c.seek(name)
  89. // Return nil if the key doesn't exist or it is not a bucket.
  90. if !bytes.Equal(name, k) || (flags&bucketLeafFlag) == 0 {
  91. return nil
  92. }
  93. // Otherwise create a bucket and cache it.
  94. var child = b.openBucket(v)
  95. if b.buckets != nil {
  96. b.buckets[string(name)] = child
  97. }
  98. return child
  99. }
  100. // Helper method that re-interprets a sub-bucket value
  101. // from a parent into a Bucket
  102. func (b *Bucket) openBucket(value []byte) *Bucket {
  103. var child = newBucket(b.tx)
  104. // Unaligned access requires a copy to be made.
  105. const unalignedMask = unsafe.Alignof(struct {
  106. bucket
  107. page
  108. }{}) - 1
  109. unaligned := uintptr(unsafe.Pointer(&value[0]))&unalignedMask != 0
  110. if unaligned {
  111. value = cloneBytes(value)
  112. }
  113. // If this is a writable transaction then we need to copy the bucket entry.
  114. // Read-only transactions can point directly at the mmap entry.
  115. if b.tx.writable && !unaligned {
  116. child.bucket = &bucket{}
  117. *child.bucket = *(*bucket)(unsafe.Pointer(&value[0]))
  118. } else {
  119. child.bucket = (*bucket)(unsafe.Pointer(&value[0]))
  120. }
  121. // Save a reference to the inline page if the bucket is inline.
  122. if child.root == 0 {
  123. child.page = (*page)(unsafe.Pointer(&value[bucketHeaderSize]))
  124. }
  125. return &child
  126. }
  127. // CreateBucket creates a new bucket at the given key and returns the new bucket.
  128. // Returns an error if the key already exists, if the bucket name is blank, or if the bucket name is too long.
  129. // The bucket instance is only valid for the lifetime of the transaction.
  130. func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) {
  131. if b.tx.db == nil {
  132. return nil, ErrTxClosed
  133. } else if !b.tx.writable {
  134. return nil, ErrTxNotWritable
  135. } else if len(key) == 0 {
  136. return nil, ErrBucketNameRequired
  137. }
  138. // Move cursor to correct position.
  139. c := b.Cursor()
  140. k, _, flags := c.seek(key)
  141. // Return an error if there is an existing key.
  142. if bytes.Equal(key, k) {
  143. if (flags & bucketLeafFlag) != 0 {
  144. return nil, ErrBucketExists
  145. }
  146. return nil, ErrIncompatibleValue
  147. }
  148. // Create empty, inline bucket.
  149. var bucket = Bucket{
  150. bucket: &bucket{},
  151. rootNode: &node{isLeaf: true},
  152. FillPercent: DefaultFillPercent,
  153. }
  154. var value = bucket.write()
  155. // Insert into node.
  156. key = cloneBytes(key)
  157. c.node().put(key, key, value, 0, bucketLeafFlag)
  158. // Since subbuckets are not allowed on inline buckets, we need to
  159. // dereference the inline page, if it exists. This will cause the bucket
  160. // to be treated as a regular, non-inline bucket for the rest of the tx.
  161. b.page = nil
  162. return b.Bucket(key), nil
  163. }
  164. // CreateBucketIfNotExists creates a new bucket if it doesn't already exist and returns a reference to it.
  165. // Returns an error if the bucket name is blank, or if the bucket name is too long.
  166. // The bucket instance is only valid for the lifetime of the transaction.
  167. func (b *Bucket) CreateBucketIfNotExists(key []byte) (*Bucket, error) {
  168. child, err := b.CreateBucket(key)
  169. if err == ErrBucketExists {
  170. return b.Bucket(key), nil
  171. } else if err != nil {
  172. return nil, err
  173. }
  174. return child, nil
  175. }
  176. // DeleteBucket deletes a bucket at the given key.
  177. // Returns an error if the bucket does not exist, or if the key represents a non-bucket value.
  178. func (b *Bucket) DeleteBucket(key []byte) error {
  179. if b.tx.db == nil {
  180. return ErrTxClosed
  181. } else if !b.Writable() {
  182. return ErrTxNotWritable
  183. }
  184. // Move cursor to correct position.
  185. c := b.Cursor()
  186. k, _, flags := c.seek(key)
  187. // Return an error if bucket doesn't exist or is not a bucket.
  188. if !bytes.Equal(key, k) {
  189. return ErrBucketNotFound
  190. } else if (flags & bucketLeafFlag) == 0 {
  191. return ErrIncompatibleValue
  192. }
  193. // Recursively delete all child buckets.
  194. child := b.Bucket(key)
  195. err := child.ForEachBucket(func(k []byte) error {
  196. if err := child.DeleteBucket(k); err != nil {
  197. return fmt.Errorf("delete bucket: %s", err)
  198. }
  199. return nil
  200. })
  201. if err != nil {
  202. return err
  203. }
  204. // Remove cached copy.
  205. delete(b.buckets, string(key))
  206. // Release all bucket pages to freelist.
  207. child.nodes = nil
  208. child.rootNode = nil
  209. child.free()
  210. // Delete the node if we have a matching key.
  211. c.node().del(key)
  212. return nil
  213. }
  214. // Get retrieves the value for a key in the bucket.
  215. // Returns a nil value if the key does not exist or if the key is a nested bucket.
  216. // The returned value is only valid for the life of the transaction.
  217. func (b *Bucket) Get(key []byte) []byte {
  218. k, v, flags := b.Cursor().seek(key)
  219. // Return nil if this is a bucket.
  220. if (flags & bucketLeafFlag) != 0 {
  221. return nil
  222. }
  223. // If our target node isn't the same key as what's passed in then return nil.
  224. if !bytes.Equal(key, k) {
  225. return nil
  226. }
  227. return v
  228. }
  229. // Put sets the value for a key in the bucket.
  230. // If the key exist then its previous value will be overwritten.
  231. // Supplied value must remain valid for the life of the transaction.
  232. // Returns an error if the bucket was created from a read-only transaction, if the key is blank, if the key is too large, or if the value is too large.
  233. func (b *Bucket) Put(key []byte, value []byte) error {
  234. if b.tx.db == nil {
  235. return ErrTxClosed
  236. } else if !b.Writable() {
  237. return ErrTxNotWritable
  238. } else if len(key) == 0 {
  239. return ErrKeyRequired
  240. } else if len(key) > MaxKeySize {
  241. return ErrKeyTooLarge
  242. } else if int64(len(value)) > MaxValueSize {
  243. return ErrValueTooLarge
  244. }
  245. // Move cursor to correct position.
  246. c := b.Cursor()
  247. k, _, flags := c.seek(key)
  248. // Return an error if there is an existing key with a bucket value.
  249. if bytes.Equal(key, k) && (flags&bucketLeafFlag) != 0 {
  250. return ErrIncompatibleValue
  251. }
  252. // Insert into node.
  253. key = cloneBytes(key)
  254. c.node().put(key, key, value, 0, 0)
  255. return nil
  256. }
  257. // Delete removes a key from the bucket.
  258. // If the key does not exist then nothing is done and a nil error is returned.
  259. // Returns an error if the bucket was created from a read-only transaction.
  260. func (b *Bucket) Delete(key []byte) error {
  261. if b.tx.db == nil {
  262. return ErrTxClosed
  263. } else if !b.Writable() {
  264. return ErrTxNotWritable
  265. }
  266. // Move cursor to correct position.
  267. c := b.Cursor()
  268. k, _, flags := c.seek(key)
  269. // Return nil if the key doesn't exist.
  270. if !bytes.Equal(key, k) {
  271. return nil
  272. }
  273. // Return an error if there is already existing bucket value.
  274. if (flags & bucketLeafFlag) != 0 {
  275. return ErrIncompatibleValue
  276. }
  277. // Delete the node if we have a matching key.
  278. c.node().del(key)
  279. return nil
  280. }
  281. // Sequence returns the current integer for the bucket without incrementing it.
  282. func (b *Bucket) Sequence() uint64 { return b.bucket.sequence }
  283. // SetSequence updates the sequence number for the bucket.
  284. func (b *Bucket) SetSequence(v uint64) error {
  285. if b.tx.db == nil {
  286. return ErrTxClosed
  287. } else if !b.Writable() {
  288. return ErrTxNotWritable
  289. }
  290. // Materialize the root node if it hasn't been already so that the
  291. // bucket will be saved during commit.
  292. if b.rootNode == nil {
  293. _ = b.node(b.root, nil)
  294. }
  295. // Set the sequence.
  296. b.bucket.sequence = v
  297. return nil
  298. }
  299. // NextSequence returns an autoincrementing integer for the bucket.
  300. func (b *Bucket) NextSequence() (uint64, error) {
  301. if b.tx.db == nil {
  302. return 0, ErrTxClosed
  303. } else if !b.Writable() {
  304. return 0, ErrTxNotWritable
  305. }
  306. // Materialize the root node if it hasn't been already so that the
  307. // bucket will be saved during commit.
  308. if b.rootNode == nil {
  309. _ = b.node(b.root, nil)
  310. }
  311. // Increment and return the sequence.
  312. b.bucket.sequence++
  313. return b.bucket.sequence, nil
  314. }
  315. // ForEach executes a function for each key/value pair in a bucket.
  316. // Because ForEach uses a Cursor, the iteration over keys is in lexicographical order.
  317. // If the provided function returns an error then the iteration is stopped and
  318. // the error is returned to the caller. The provided function must not modify
  319. // the bucket; this will result in undefined behavior.
  320. func (b *Bucket) ForEach(fn func(k, v []byte) error) error {
  321. if b.tx.db == nil {
  322. return ErrTxClosed
  323. }
  324. c := b.Cursor()
  325. for k, v := c.First(); k != nil; k, v = c.Next() {
  326. if err := fn(k, v); err != nil {
  327. return err
  328. }
  329. }
  330. return nil
  331. }
  332. func (b *Bucket) ForEachBucket(fn func(k []byte) error) error {
  333. if b.tx.db == nil {
  334. return ErrTxClosed
  335. }
  336. c := b.Cursor()
  337. for k, _, flags := c.first(); k != nil; k, _, flags = c.next() {
  338. if flags&bucketLeafFlag != 0 {
  339. if err := fn(k); err != nil {
  340. return err
  341. }
  342. }
  343. }
  344. return nil
  345. }
  346. // Stats returns stats on a bucket.
  347. func (b *Bucket) Stats() BucketStats {
  348. var s, subStats BucketStats
  349. pageSize := b.tx.db.pageSize
  350. s.BucketN += 1
  351. if b.root == 0 {
  352. s.InlineBucketN += 1
  353. }
  354. b.forEachPage(func(p *page, depth int, pgstack []pgid) {
  355. if (p.flags & leafPageFlag) != 0 {
  356. s.KeyN += int(p.count)
  357. // used totals the used bytes for the page
  358. used := pageHeaderSize
  359. if p.count != 0 {
  360. // If page has any elements, add all element headers.
  361. used += leafPageElementSize * uintptr(p.count-1)
  362. // Add all element key, value sizes.
  363. // The computation takes advantage of the fact that the position
  364. // of the last element's key/value equals to the total of the sizes
  365. // of all previous elements' keys and values.
  366. // It also includes the last element's header.
  367. lastElement := p.leafPageElement(p.count - 1)
  368. used += uintptr(lastElement.pos + lastElement.ksize + lastElement.vsize)
  369. }
  370. if b.root == 0 {
  371. // For inlined bucket just update the inline stats
  372. s.InlineBucketInuse += int(used)
  373. } else {
  374. // For non-inlined bucket update all the leaf stats
  375. s.LeafPageN++
  376. s.LeafInuse += int(used)
  377. s.LeafOverflowN += int(p.overflow)
  378. // Collect stats from sub-buckets.
  379. // Do that by iterating over all element headers
  380. // looking for the ones with the bucketLeafFlag.
  381. for i := uint16(0); i < p.count; i++ {
  382. e := p.leafPageElement(i)
  383. if (e.flags & bucketLeafFlag) != 0 {
  384. // For any bucket element, open the element value
  385. // and recursively call Stats on the contained bucket.
  386. subStats.Add(b.openBucket(e.value()).Stats())
  387. }
  388. }
  389. }
  390. } else if (p.flags & branchPageFlag) != 0 {
  391. s.BranchPageN++
  392. lastElement := p.branchPageElement(p.count - 1)
  393. // used totals the used bytes for the page
  394. // Add header and all element headers.
  395. used := pageHeaderSize + (branchPageElementSize * uintptr(p.count-1))
  396. // Add size of all keys and values.
  397. // Again, use the fact that last element's position equals to
  398. // the total of key, value sizes of all previous elements.
  399. used += uintptr(lastElement.pos + lastElement.ksize)
  400. s.BranchInuse += int(used)
  401. s.BranchOverflowN += int(p.overflow)
  402. }
  403. // Keep track of maximum page depth.
  404. if depth+1 > s.Depth {
  405. s.Depth = depth + 1
  406. }
  407. })
  408. // Alloc stats can be computed from page counts and pageSize.
  409. s.BranchAlloc = (s.BranchPageN + s.BranchOverflowN) * pageSize
  410. s.LeafAlloc = (s.LeafPageN + s.LeafOverflowN) * pageSize
  411. // Add the max depth of sub-buckets to get total nested depth.
  412. s.Depth += subStats.Depth
  413. // Add the stats for all sub-buckets
  414. s.Add(subStats)
  415. return s
  416. }
  417. // forEachPage iterates over every page in a bucket, including inline pages.
  418. func (b *Bucket) forEachPage(fn func(*page, int, []pgid)) {
  419. // If we have an inline page then just use that.
  420. if b.page != nil {
  421. fn(b.page, 0, []pgid{b.root})
  422. return
  423. }
  424. // Otherwise traverse the page hierarchy.
  425. b.tx.forEachPage(b.root, fn)
  426. }
  427. // forEachPageNode iterates over every page (or node) in a bucket.
  428. // This also includes inline pages.
  429. func (b *Bucket) forEachPageNode(fn func(*page, *node, int)) {
  430. // If we have an inline page or root node then just use that.
  431. if b.page != nil {
  432. fn(b.page, nil, 0)
  433. return
  434. }
  435. b._forEachPageNode(b.root, 0, fn)
  436. }
  437. func (b *Bucket) _forEachPageNode(pgId pgid, depth int, fn func(*page, *node, int)) {
  438. var p, n = b.pageNode(pgId)
  439. // Execute function.
  440. fn(p, n, depth)
  441. // Recursively loop over children.
  442. if p != nil {
  443. if (p.flags & branchPageFlag) != 0 {
  444. for i := 0; i < int(p.count); i++ {
  445. elem := p.branchPageElement(uint16(i))
  446. b._forEachPageNode(elem.pgid, depth+1, fn)
  447. }
  448. }
  449. } else {
  450. if !n.isLeaf {
  451. for _, inode := range n.inodes {
  452. b._forEachPageNode(inode.pgid, depth+1, fn)
  453. }
  454. }
  455. }
  456. }
  457. // spill writes all the nodes for this bucket to dirty pages.
  458. func (b *Bucket) spill() error {
  459. // Spill all child buckets first.
  460. for name, child := range b.buckets {
  461. // If the child bucket is small enough and it has no child buckets then
  462. // write it inline into the parent bucket's page. Otherwise spill it
  463. // like a normal bucket and make the parent value a pointer to the page.
  464. var value []byte
  465. if child.inlineable() {
  466. child.free()
  467. value = child.write()
  468. } else {
  469. if err := child.spill(); err != nil {
  470. return err
  471. }
  472. // Update the child bucket header in this bucket.
  473. value = make([]byte, unsafe.Sizeof(bucket{}))
  474. var bucket = (*bucket)(unsafe.Pointer(&value[0]))
  475. *bucket = *child.bucket
  476. }
  477. // Skip writing the bucket if there are no materialized nodes.
  478. if child.rootNode == nil {
  479. continue
  480. }
  481. // Update parent node.
  482. var c = b.Cursor()
  483. k, _, flags := c.seek([]byte(name))
  484. if !bytes.Equal([]byte(name), k) {
  485. panic(fmt.Sprintf("misplaced bucket header: %x -> %x", []byte(name), k))
  486. }
  487. if flags&bucketLeafFlag == 0 {
  488. panic(fmt.Sprintf("unexpected bucket header flag: %x", flags))
  489. }
  490. c.node().put([]byte(name), []byte(name), value, 0, bucketLeafFlag)
  491. }
  492. // Ignore if there's not a materialized root node.
  493. if b.rootNode == nil {
  494. return nil
  495. }
  496. // Spill nodes.
  497. if err := b.rootNode.spill(); err != nil {
  498. return err
  499. }
  500. b.rootNode = b.rootNode.root()
  501. // Update the root node for this bucket.
  502. if b.rootNode.pgid >= b.tx.meta.pgid {
  503. panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", b.rootNode.pgid, b.tx.meta.pgid))
  504. }
  505. b.root = b.rootNode.pgid
  506. return nil
  507. }
  508. // inlineable returns true if a bucket is small enough to be written inline
  509. // and if it contains no subbuckets. Otherwise returns false.
  510. func (b *Bucket) inlineable() bool {
  511. var n = b.rootNode
  512. // Bucket must only contain a single leaf node.
  513. if n == nil || !n.isLeaf {
  514. return false
  515. }
  516. // Bucket is not inlineable if it contains subbuckets or if it goes beyond
  517. // our threshold for inline bucket size.
  518. var size = pageHeaderSize
  519. for _, inode := range n.inodes {
  520. size += leafPageElementSize + uintptr(len(inode.key)) + uintptr(len(inode.value))
  521. if inode.flags&bucketLeafFlag != 0 {
  522. return false
  523. } else if size > b.maxInlineBucketSize() {
  524. return false
  525. }
  526. }
  527. return true
  528. }
  529. // Returns the maximum total size of a bucket to make it a candidate for inlining.
  530. func (b *Bucket) maxInlineBucketSize() uintptr {
  531. return uintptr(b.tx.db.pageSize / 4)
  532. }
  533. // write allocates and writes a bucket to a byte slice.
  534. func (b *Bucket) write() []byte {
  535. // Allocate the appropriate size.
  536. var n = b.rootNode
  537. var value = make([]byte, bucketHeaderSize+n.size())
  538. // Write a bucket header.
  539. var bucket = (*bucket)(unsafe.Pointer(&value[0]))
  540. *bucket = *b.bucket
  541. // Convert byte slice to a fake page and write the root node.
  542. var p = (*page)(unsafe.Pointer(&value[bucketHeaderSize]))
  543. n.write(p)
  544. return value
  545. }
  546. // rebalance attempts to balance all nodes.
  547. func (b *Bucket) rebalance() {
  548. for _, n := range b.nodes {
  549. n.rebalance()
  550. }
  551. for _, child := range b.buckets {
  552. child.rebalance()
  553. }
  554. }
  555. // node creates a node from a page and associates it with a given parent.
  556. func (b *Bucket) node(pgId pgid, parent *node) *node {
  557. _assert(b.nodes != nil, "nodes map expected")
  558. // Retrieve node if it's already been created.
  559. if n := b.nodes[pgId]; n != nil {
  560. return n
  561. }
  562. // Otherwise create a node and cache it.
  563. n := &node{bucket: b, parent: parent}
  564. if parent == nil {
  565. b.rootNode = n
  566. } else {
  567. parent.children = append(parent.children, n)
  568. }
  569. // Use the inline page if this is an inline bucket.
  570. var p = b.page
  571. if p == nil {
  572. p = b.tx.page(pgId)
  573. }
  574. // Read the page into the node and cache it.
  575. n.read(p)
  576. b.nodes[pgId] = n
  577. // Update statistics.
  578. b.tx.stats.IncNodeCount(1)
  579. return n
  580. }
  581. // free recursively frees all pages in the bucket.
  582. func (b *Bucket) free() {
  583. if b.root == 0 {
  584. return
  585. }
  586. var tx = b.tx
  587. b.forEachPageNode(func(p *page, n *node, _ int) {
  588. if p != nil {
  589. tx.db.freelist.free(tx.meta.txid, p)
  590. } else {
  591. n.free()
  592. }
  593. })
  594. b.root = 0
  595. }
  596. // dereference removes all references to the old mmap.
  597. func (b *Bucket) dereference() {
  598. if b.rootNode != nil {
  599. b.rootNode.root().dereference()
  600. }
  601. for _, child := range b.buckets {
  602. child.dereference()
  603. }
  604. }
  605. // pageNode returns the in-memory node, if it exists.
  606. // Otherwise returns the underlying page.
  607. func (b *Bucket) pageNode(id pgid) (*page, *node) {
  608. // Inline buckets have a fake page embedded in their value so treat them
  609. // differently. We'll return the rootNode (if available) or the fake page.
  610. if b.root == 0 {
  611. if id != 0 {
  612. panic(fmt.Sprintf("inline bucket non-zero page access(2): %d != 0", id))
  613. }
  614. if b.rootNode != nil {
  615. return nil, b.rootNode
  616. }
  617. return b.page, nil
  618. }
  619. // Check the node cache for non-inline buckets.
  620. if b.nodes != nil {
  621. if n := b.nodes[id]; n != nil {
  622. return nil, n
  623. }
  624. }
  625. // Finally lookup the page from the transaction if no node is materialized.
  626. return b.tx.page(id), nil
  627. }
  628. // BucketStats records statistics about resources used by a bucket.
  629. type BucketStats struct {
  630. // Page count statistics.
  631. BranchPageN int // number of logical branch pages
  632. BranchOverflowN int // number of physical branch overflow pages
  633. LeafPageN int // number of logical leaf pages
  634. LeafOverflowN int // number of physical leaf overflow pages
  635. // Tree statistics.
  636. KeyN int // number of keys/value pairs
  637. Depth int // number of levels in B+tree
  638. // Page size utilization.
  639. BranchAlloc int // bytes allocated for physical branch pages
  640. BranchInuse int // bytes actually used for branch data
  641. LeafAlloc int // bytes allocated for physical leaf pages
  642. LeafInuse int // bytes actually used for leaf data
  643. // Bucket statistics
  644. BucketN int // total number of buckets including the top bucket
  645. InlineBucketN int // total number on inlined buckets
  646. InlineBucketInuse int // bytes used for inlined buckets (also accounted for in LeafInuse)
  647. }
  648. func (s *BucketStats) Add(other BucketStats) {
  649. s.BranchPageN += other.BranchPageN
  650. s.BranchOverflowN += other.BranchOverflowN
  651. s.LeafPageN += other.LeafPageN
  652. s.LeafOverflowN += other.LeafOverflowN
  653. s.KeyN += other.KeyN
  654. if s.Depth < other.Depth {
  655. s.Depth = other.Depth
  656. }
  657. s.BranchAlloc += other.BranchAlloc
  658. s.BranchInuse += other.BranchInuse
  659. s.LeafAlloc += other.LeafAlloc
  660. s.LeafInuse += other.LeafInuse
  661. s.BucketN += other.BucketN
  662. s.InlineBucketN += other.InlineBucketN
  663. s.InlineBucketInuse += other.InlineBucketInuse
  664. }
  665. // cloneBytes returns a copy of a given slice.
  666. func cloneBytes(v []byte) []byte {
  667. var clone = make([]byte, len(v))
  668. copy(clone, v)
  669. return clone
  670. }