intern.go 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183
  1. // Copyright 2020 Brad Fitzpatrick. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // Package intern lets you make smaller comparable values by boxing
  5. // a larger comparable value (such as a 16 byte string header) down
  6. // into a globally unique 8 byte pointer.
  7. //
  8. // The globally unique pointers are garbage collected with weak
  9. // references and finalizers. This package hides that.
  10. //
  11. // The GitHub repo is https://github.com/go4org/intern
  12. package intern // import "go4.org/intern"
  13. import (
  14. "os"
  15. "runtime"
  16. "strconv"
  17. "sync"
  18. "unsafe"
  19. _ "go4.org/unsafe/assume-no-moving-gc"
  20. )
  21. // A Value pointer is the handle to an underlying comparable value.
  22. // See func Get for how Value pointers may be used.
  23. type Value struct {
  24. _ [0]func() // prevent people from accidentally using value type as comparable
  25. cmpVal interface{}
  26. // resurrected is guarded by mu (for all instances of Value).
  27. // It is set true whenever v is synthesized from a uintptr.
  28. resurrected bool
  29. }
  30. // Get returns the comparable value passed to the Get func
  31. // that returned v.
  32. func (v *Value) Get() interface{} { return v.cmpVal }
  33. // key is a key in our global value map.
  34. // It contains type-specialized fields to avoid allocations
  35. // when converting common types to empty interfaces.
  36. type key struct {
  37. s string
  38. cmpVal interface{}
  39. // isString reports whether key contains a string.
  40. // Without it, the zero value of key is ambiguous.
  41. isString bool
  42. }
  43. // keyFor returns a key to use with cmpVal.
  44. func keyFor(cmpVal interface{}) key {
  45. if s, ok := cmpVal.(string); ok {
  46. return key{s: s, isString: true}
  47. }
  48. return key{cmpVal: cmpVal}
  49. }
  50. // Value returns a *Value built from k.
  51. func (k key) Value() *Value {
  52. if k.isString {
  53. return &Value{cmpVal: k.s}
  54. }
  55. return &Value{cmpVal: k.cmpVal}
  56. }
  57. var (
  58. // mu guards valMap, a weakref map of *Value by underlying value.
  59. // It also guards the resurrected field of all *Values.
  60. mu sync.Mutex
  61. valMap = map[key]uintptr{} // to uintptr(*Value)
  62. valSafe = safeMap() // non-nil in safe+leaky mode
  63. )
  64. // safeMap returns a non-nil map if we're in safe-but-leaky mode,
  65. // as controlled by GO4_INTERN_SAFE_BUT_LEAKY.
  66. func safeMap() map[key]*Value {
  67. if v, _ := strconv.ParseBool(os.Getenv("GO4_INTERN_SAFE_BUT_LEAKY")); v {
  68. return map[key]*Value{}
  69. }
  70. return nil
  71. }
  72. // Get returns a pointer representing the comparable value cmpVal.
  73. //
  74. // The returned pointer will be the same for Get(v) and Get(v2)
  75. // if and only if v == v2, and can be used as a map key.
  76. func Get(cmpVal interface{}) *Value {
  77. return get(keyFor(cmpVal))
  78. }
  79. // GetByString is identical to Get, except that it is specialized for strings.
  80. // This avoids an allocation from putting a string into an interface{}
  81. // to pass as an argument to Get.
  82. func GetByString(s string) *Value {
  83. return get(key{s: s, isString: true})
  84. }
  85. // We play unsafe games that violate Go's rules (and assume a non-moving
  86. // collector). So we quiet Go here.
  87. // See the comment below Get for more implementation details.
  88. //go:nocheckptr
  89. func get(k key) *Value {
  90. mu.Lock()
  91. defer mu.Unlock()
  92. var v *Value
  93. if valSafe != nil {
  94. v = valSafe[k]
  95. } else if addr, ok := valMap[k]; ok {
  96. v = (*Value)((unsafe.Pointer)(addr))
  97. v.resurrected = true
  98. }
  99. if v != nil {
  100. return v
  101. }
  102. v = k.Value()
  103. if valSafe != nil {
  104. valSafe[k] = v
  105. } else {
  106. // SetFinalizer before uintptr conversion (theoretical concern;
  107. // see https://github.com/go4org/intern/issues/13)
  108. runtime.SetFinalizer(v, finalize)
  109. valMap[k] = uintptr(unsafe.Pointer(v))
  110. }
  111. return v
  112. }
  113. func finalize(v *Value) {
  114. mu.Lock()
  115. defer mu.Unlock()
  116. if v.resurrected {
  117. // We lost the race. Somebody resurrected it while we
  118. // were about to finalize it. Try again next round.
  119. v.resurrected = false
  120. runtime.SetFinalizer(v, finalize)
  121. return
  122. }
  123. delete(valMap, keyFor(v.cmpVal))
  124. }
  125. // Interning is simple if you don't require that unused values be
  126. // garbage collectable. But we do require that; we don't want to be
  127. // DOS vector. We do this by using a uintptr to hide the pointer from
  128. // the garbage collector, and using a finalizer to eliminate the
  129. // pointer when no other code is using it.
  130. //
  131. // The obvious implementation of this is to use a
  132. // map[interface{}]uintptr-of-*interface{}, and set up a finalizer to
  133. // delete from the map. Unfortunately, this is racy. Because pointers
  134. // are being created in violation of Go's unsafety rules, it's
  135. // possible to create a pointer to a value concurrently with the GC
  136. // concluding that the value can be collected. There are other races
  137. // that break the equality invariant as well, but the use-after-free
  138. // will cause a runtime crash.
  139. //
  140. // To make this work, the finalizer needs to know that no references
  141. // have been unsafely created since the finalizer was set up. To do
  142. // this, values carry a "resurrected" sentinel, which gets set
  143. // whenever a pointer is unsafely created. If the finalizer encounters
  144. // the sentinel, it clears the sentinel and delays collection for one
  145. // additional GC cycle, by re-installing itself as finalizer. This
  146. // ensures that the unsafely created pointer is visible to the GC, and
  147. // will correctly prevent collection.
  148. //
  149. // This technique does mean that interned values that get reused take
  150. // at least 3 GC cycles to fully collect (1 to clear the sentinel, 1
  151. // to clean up the unsafe map, 1 to be actually deleted).
  152. //
  153. // @ianlancetaylor commented in
  154. // https://github.com/golang/go/issues/41303#issuecomment-717401656
  155. // that it is possible to implement weak references in terms of
  156. // finalizers without unsafe. Unfortunately, the approach he outlined
  157. // does not work here, for two reasons. First, there is no way to
  158. // construct a strong pointer out of a weak pointer; our map stores
  159. // weak pointers, but we must return strong pointers to callers.
  160. // Second, and more fundamentally, we must return not just _a_ strong
  161. // pointer to callers, but _the same_ strong pointer to callers. In
  162. // order to return _the same_ strong pointer to callers, we must track
  163. // it, which is exactly what we cannot do with strong pointers.
  164. //
  165. // See https://github.com/inetaf/netaddr/issues/53 for more
  166. // discussion, and https://github.com/go4org/intern/issues/2 for an
  167. // illustration of the subtleties at play.