decoder.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519
  1. // Copyright 2021 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package pkgbits
  5. import (
  6. "encoding/binary"
  7. "errors"
  8. "fmt"
  9. "go/constant"
  10. "go/token"
  11. "io"
  12. "math/big"
  13. "os"
  14. "runtime"
  15. "strings"
  16. )
  17. // A PkgDecoder provides methods for decoding a package's Unified IR
  18. // export data.
  19. type PkgDecoder struct {
  20. // version is the file format version.
  21. version Version
  22. // sync indicates whether the file uses sync markers.
  23. sync bool
  24. // pkgPath is the package path for the package to be decoded.
  25. //
  26. // TODO(mdempsky): Remove; unneeded since CL 391014.
  27. pkgPath string
  28. // elemData is the full data payload of the encoded package.
  29. // Elements are densely and contiguously packed together.
  30. //
  31. // The last 8 bytes of elemData are the package fingerprint.
  32. elemData string
  33. // elemEnds stores the byte-offset end positions of element
  34. // bitstreams within elemData.
  35. //
  36. // For example, element I's bitstream data starts at elemEnds[I-1]
  37. // (or 0, if I==0) and ends at elemEnds[I].
  38. //
  39. // Note: elemEnds is indexed by absolute indices, not
  40. // section-relative indices.
  41. elemEnds []uint32
  42. // elemEndsEnds stores the index-offset end positions of relocation
  43. // sections within elemEnds.
  44. //
  45. // For example, section K's end positions start at elemEndsEnds[K-1]
  46. // (or 0, if K==0) and end at elemEndsEnds[K].
  47. elemEndsEnds [numRelocs]uint32
  48. scratchRelocEnt []RelocEnt
  49. }
  50. // PkgPath returns the package path for the package
  51. //
  52. // TODO(mdempsky): Remove; unneeded since CL 391014.
  53. func (pr *PkgDecoder) PkgPath() string { return pr.pkgPath }
  54. // SyncMarkers reports whether pr uses sync markers.
  55. func (pr *PkgDecoder) SyncMarkers() bool { return pr.sync }
  56. // NewPkgDecoder returns a PkgDecoder initialized to read the Unified
  57. // IR export data from input. pkgPath is the package path for the
  58. // compilation unit that produced the export data.
  59. func NewPkgDecoder(pkgPath, input string) PkgDecoder {
  60. pr := PkgDecoder{
  61. pkgPath: pkgPath,
  62. }
  63. // TODO(mdempsky): Implement direct indexing of input string to
  64. // avoid copying the position information.
  65. r := strings.NewReader(input)
  66. var ver uint32
  67. assert(binary.Read(r, binary.LittleEndian, &ver) == nil)
  68. pr.version = Version(ver)
  69. if pr.version >= numVersions {
  70. panic(fmt.Errorf("cannot decode %q, export data version %d is greater than maximum supported version %d", pkgPath, pr.version, numVersions-1))
  71. }
  72. if pr.version.Has(Flags) {
  73. var flags uint32
  74. assert(binary.Read(r, binary.LittleEndian, &flags) == nil)
  75. pr.sync = flags&flagSyncMarkers != 0
  76. }
  77. assert(binary.Read(r, binary.LittleEndian, pr.elemEndsEnds[:]) == nil)
  78. pr.elemEnds = make([]uint32, pr.elemEndsEnds[len(pr.elemEndsEnds)-1])
  79. assert(binary.Read(r, binary.LittleEndian, pr.elemEnds[:]) == nil)
  80. pos, err := r.Seek(0, io.SeekCurrent)
  81. assert(err == nil)
  82. pr.elemData = input[pos:]
  83. const fingerprintSize = 8
  84. assert(len(pr.elemData)-fingerprintSize == int(pr.elemEnds[len(pr.elemEnds)-1]))
  85. return pr
  86. }
  87. // NumElems returns the number of elements in section k.
  88. func (pr *PkgDecoder) NumElems(k RelocKind) int {
  89. count := int(pr.elemEndsEnds[k])
  90. if k > 0 {
  91. count -= int(pr.elemEndsEnds[k-1])
  92. }
  93. return count
  94. }
  95. // TotalElems returns the total number of elements across all sections.
  96. func (pr *PkgDecoder) TotalElems() int {
  97. return len(pr.elemEnds)
  98. }
  99. // Fingerprint returns the package fingerprint.
  100. func (pr *PkgDecoder) Fingerprint() [8]byte {
  101. var fp [8]byte
  102. copy(fp[:], pr.elemData[len(pr.elemData)-8:])
  103. return fp
  104. }
  105. // AbsIdx returns the absolute index for the given (section, index)
  106. // pair.
  107. func (pr *PkgDecoder) AbsIdx(k RelocKind, idx Index) int {
  108. absIdx := int(idx)
  109. if k > 0 {
  110. absIdx += int(pr.elemEndsEnds[k-1])
  111. }
  112. if absIdx >= int(pr.elemEndsEnds[k]) {
  113. panicf("%v:%v is out of bounds; %v", k, idx, pr.elemEndsEnds)
  114. }
  115. return absIdx
  116. }
  117. // DataIdx returns the raw element bitstream for the given (section,
  118. // index) pair.
  119. func (pr *PkgDecoder) DataIdx(k RelocKind, idx Index) string {
  120. absIdx := pr.AbsIdx(k, idx)
  121. var start uint32
  122. if absIdx > 0 {
  123. start = pr.elemEnds[absIdx-1]
  124. }
  125. end := pr.elemEnds[absIdx]
  126. return pr.elemData[start:end]
  127. }
  128. // StringIdx returns the string value for the given string index.
  129. func (pr *PkgDecoder) StringIdx(idx Index) string {
  130. return pr.DataIdx(RelocString, idx)
  131. }
  132. // NewDecoder returns a Decoder for the given (section, index) pair,
  133. // and decodes the given SyncMarker from the element bitstream.
  134. func (pr *PkgDecoder) NewDecoder(k RelocKind, idx Index, marker SyncMarker) Decoder {
  135. r := pr.NewDecoderRaw(k, idx)
  136. r.Sync(marker)
  137. return r
  138. }
  139. // TempDecoder returns a Decoder for the given (section, index) pair,
  140. // and decodes the given SyncMarker from the element bitstream.
  141. // If possible the Decoder should be RetireDecoder'd when it is no longer
  142. // needed, this will avoid heap allocations.
  143. func (pr *PkgDecoder) TempDecoder(k RelocKind, idx Index, marker SyncMarker) Decoder {
  144. r := pr.TempDecoderRaw(k, idx)
  145. r.Sync(marker)
  146. return r
  147. }
  148. func (pr *PkgDecoder) RetireDecoder(d *Decoder) {
  149. pr.scratchRelocEnt = d.Relocs
  150. d.Relocs = nil
  151. }
  152. // NewDecoderRaw returns a Decoder for the given (section, index) pair.
  153. //
  154. // Most callers should use NewDecoder instead.
  155. func (pr *PkgDecoder) NewDecoderRaw(k RelocKind, idx Index) Decoder {
  156. r := Decoder{
  157. common: pr,
  158. k: k,
  159. Idx: idx,
  160. }
  161. r.Data.Reset(pr.DataIdx(k, idx))
  162. r.Sync(SyncRelocs)
  163. r.Relocs = make([]RelocEnt, r.Len())
  164. for i := range r.Relocs {
  165. r.Sync(SyncReloc)
  166. r.Relocs[i] = RelocEnt{RelocKind(r.Len()), Index(r.Len())}
  167. }
  168. return r
  169. }
  170. func (pr *PkgDecoder) TempDecoderRaw(k RelocKind, idx Index) Decoder {
  171. r := Decoder{
  172. common: pr,
  173. k: k,
  174. Idx: idx,
  175. }
  176. r.Data.Reset(pr.DataIdx(k, idx))
  177. r.Sync(SyncRelocs)
  178. l := r.Len()
  179. if cap(pr.scratchRelocEnt) >= l {
  180. r.Relocs = pr.scratchRelocEnt[:l]
  181. pr.scratchRelocEnt = nil
  182. } else {
  183. r.Relocs = make([]RelocEnt, l)
  184. }
  185. for i := range r.Relocs {
  186. r.Sync(SyncReloc)
  187. r.Relocs[i] = RelocEnt{RelocKind(r.Len()), Index(r.Len())}
  188. }
  189. return r
  190. }
  191. // A Decoder provides methods for decoding an individual element's
  192. // bitstream data.
  193. type Decoder struct {
  194. common *PkgDecoder
  195. Relocs []RelocEnt
  196. Data strings.Reader
  197. k RelocKind
  198. Idx Index
  199. }
  200. func (r *Decoder) checkErr(err error) {
  201. if err != nil {
  202. panicf("unexpected decoding error: %w", err)
  203. }
  204. }
  205. func (r *Decoder) rawUvarint() uint64 {
  206. x, err := readUvarint(&r.Data)
  207. r.checkErr(err)
  208. return x
  209. }
  210. // readUvarint is a type-specialized copy of encoding/binary.ReadUvarint.
  211. // This avoids the interface conversion and thus has better escape properties,
  212. // which flows up the stack.
  213. func readUvarint(r *strings.Reader) (uint64, error) {
  214. var x uint64
  215. var s uint
  216. for i := range binary.MaxVarintLen64 {
  217. b, err := r.ReadByte()
  218. if err != nil {
  219. if i > 0 && err == io.EOF {
  220. err = io.ErrUnexpectedEOF
  221. }
  222. return x, err
  223. }
  224. if b < 0x80 {
  225. if i == binary.MaxVarintLen64-1 && b > 1 {
  226. return x, overflow
  227. }
  228. return x | uint64(b)<<s, nil
  229. }
  230. x |= uint64(b&0x7f) << s
  231. s += 7
  232. }
  233. return x, overflow
  234. }
  235. var overflow = errors.New("pkgbits: readUvarint overflows a 64-bit integer")
  236. func (r *Decoder) rawVarint() int64 {
  237. ux := r.rawUvarint()
  238. // Zig-zag decode.
  239. x := int64(ux >> 1)
  240. if ux&1 != 0 {
  241. x = ^x
  242. }
  243. return x
  244. }
  245. func (r *Decoder) rawReloc(k RelocKind, idx int) Index {
  246. e := r.Relocs[idx]
  247. assert(e.Kind == k)
  248. return e.Idx
  249. }
  250. // Sync decodes a sync marker from the element bitstream and asserts
  251. // that it matches the expected marker.
  252. //
  253. // If r.common.sync is false, then Sync is a no-op.
  254. func (r *Decoder) Sync(mWant SyncMarker) {
  255. if !r.common.sync {
  256. return
  257. }
  258. pos, _ := r.Data.Seek(0, io.SeekCurrent)
  259. mHave := SyncMarker(r.rawUvarint())
  260. writerPCs := make([]int, r.rawUvarint())
  261. for i := range writerPCs {
  262. writerPCs[i] = int(r.rawUvarint())
  263. }
  264. if mHave == mWant {
  265. return
  266. }
  267. // There's some tension here between printing:
  268. //
  269. // (1) full file paths that tools can recognize (e.g., so emacs
  270. // hyperlinks the "file:line" text for easy navigation), or
  271. //
  272. // (2) short file paths that are easier for humans to read (e.g., by
  273. // omitting redundant or irrelevant details, so it's easier to
  274. // focus on the useful bits that remain).
  275. //
  276. // The current formatting favors the former, as it seems more
  277. // helpful in practice. But perhaps the formatting could be improved
  278. // to better address both concerns. For example, use relative file
  279. // paths if they would be shorter, or rewrite file paths to contain
  280. // "$GOROOT" (like objabi.AbsFile does) if tools can be taught how
  281. // to reliably expand that again.
  282. fmt.Printf("export data desync: package %q, section %v, index %v, offset %v\n", r.common.pkgPath, r.k, r.Idx, pos)
  283. fmt.Printf("\nfound %v, written at:\n", mHave)
  284. if len(writerPCs) == 0 {
  285. fmt.Printf("\t[stack trace unavailable; recompile package %q with -d=syncframes]\n", r.common.pkgPath)
  286. }
  287. for _, pc := range writerPCs {
  288. fmt.Printf("\t%s\n", r.common.StringIdx(r.rawReloc(RelocString, pc)))
  289. }
  290. fmt.Printf("\nexpected %v, reading at:\n", mWant)
  291. var readerPCs [32]uintptr // TODO(mdempsky): Dynamically size?
  292. n := runtime.Callers(2, readerPCs[:])
  293. for _, pc := range fmtFrames(readerPCs[:n]...) {
  294. fmt.Printf("\t%s\n", pc)
  295. }
  296. // We already printed a stack trace for the reader, so now we can
  297. // simply exit. Printing a second one with panic or base.Fatalf
  298. // would just be noise.
  299. os.Exit(1)
  300. }
  301. // Bool decodes and returns a bool value from the element bitstream.
  302. func (r *Decoder) Bool() bool {
  303. r.Sync(SyncBool)
  304. x, err := r.Data.ReadByte()
  305. r.checkErr(err)
  306. assert(x < 2)
  307. return x != 0
  308. }
  309. // Int64 decodes and returns an int64 value from the element bitstream.
  310. func (r *Decoder) Int64() int64 {
  311. r.Sync(SyncInt64)
  312. return r.rawVarint()
  313. }
  314. // Uint64 decodes and returns a uint64 value from the element bitstream.
  315. func (r *Decoder) Uint64() uint64 {
  316. r.Sync(SyncUint64)
  317. return r.rawUvarint()
  318. }
  319. // Len decodes and returns a non-negative int value from the element bitstream.
  320. func (r *Decoder) Len() int { x := r.Uint64(); v := int(x); assert(uint64(v) == x); return v }
  321. // Int decodes and returns an int value from the element bitstream.
  322. func (r *Decoder) Int() int { x := r.Int64(); v := int(x); assert(int64(v) == x); return v }
  323. // Uint decodes and returns a uint value from the element bitstream.
  324. func (r *Decoder) Uint() uint { x := r.Uint64(); v := uint(x); assert(uint64(v) == x); return v }
  325. // Code decodes a Code value from the element bitstream and returns
  326. // its ordinal value. It's the caller's responsibility to convert the
  327. // result to an appropriate Code type.
  328. //
  329. // TODO(mdempsky): Ideally this method would have signature "Code[T
  330. // Code] T" instead, but we don't allow generic methods and the
  331. // compiler can't depend on generics yet anyway.
  332. func (r *Decoder) Code(mark SyncMarker) int {
  333. r.Sync(mark)
  334. return r.Len()
  335. }
  336. // Reloc decodes a relocation of expected section k from the element
  337. // bitstream and returns an index to the referenced element.
  338. func (r *Decoder) Reloc(k RelocKind) Index {
  339. r.Sync(SyncUseReloc)
  340. return r.rawReloc(k, r.Len())
  341. }
  342. // String decodes and returns a string value from the element
  343. // bitstream.
  344. func (r *Decoder) String() string {
  345. r.Sync(SyncString)
  346. return r.common.StringIdx(r.Reloc(RelocString))
  347. }
  348. // Strings decodes and returns a variable-length slice of strings from
  349. // the element bitstream.
  350. func (r *Decoder) Strings() []string {
  351. res := make([]string, r.Len())
  352. for i := range res {
  353. res[i] = r.String()
  354. }
  355. return res
  356. }
  357. // Value decodes and returns a constant.Value from the element
  358. // bitstream.
  359. func (r *Decoder) Value() constant.Value {
  360. r.Sync(SyncValue)
  361. isComplex := r.Bool()
  362. val := r.scalar()
  363. if isComplex {
  364. val = constant.BinaryOp(val, token.ADD, constant.MakeImag(r.scalar()))
  365. }
  366. return val
  367. }
  368. func (r *Decoder) scalar() constant.Value {
  369. switch tag := CodeVal(r.Code(SyncVal)); tag {
  370. default:
  371. panic(fmt.Errorf("unexpected scalar tag: %v", tag))
  372. case ValBool:
  373. return constant.MakeBool(r.Bool())
  374. case ValString:
  375. return constant.MakeString(r.String())
  376. case ValInt64:
  377. return constant.MakeInt64(r.Int64())
  378. case ValBigInt:
  379. return constant.Make(r.bigInt())
  380. case ValBigRat:
  381. num := r.bigInt()
  382. denom := r.bigInt()
  383. return constant.Make(new(big.Rat).SetFrac(num, denom))
  384. case ValBigFloat:
  385. return constant.Make(r.bigFloat())
  386. }
  387. }
  388. func (r *Decoder) bigInt() *big.Int {
  389. v := new(big.Int).SetBytes([]byte(r.String()))
  390. if r.Bool() {
  391. v.Neg(v)
  392. }
  393. return v
  394. }
  395. func (r *Decoder) bigFloat() *big.Float {
  396. v := new(big.Float).SetPrec(512)
  397. assert(v.UnmarshalText([]byte(r.String())) == nil)
  398. return v
  399. }
  400. // @@@ Helpers
  401. // TODO(mdempsky): These should probably be removed. I think they're a
  402. // smell that the export data format is not yet quite right.
  403. // PeekPkgPath returns the package path for the specified package
  404. // index.
  405. func (pr *PkgDecoder) PeekPkgPath(idx Index) string {
  406. var path string
  407. {
  408. r := pr.TempDecoder(RelocPkg, idx, SyncPkgDef)
  409. path = r.String()
  410. pr.RetireDecoder(&r)
  411. }
  412. if path == "" {
  413. path = pr.pkgPath
  414. }
  415. return path
  416. }
  417. // PeekObj returns the package path, object name, and CodeObj for the
  418. // specified object index.
  419. func (pr *PkgDecoder) PeekObj(idx Index) (string, string, CodeObj) {
  420. var ridx Index
  421. var name string
  422. var rcode int
  423. {
  424. r := pr.TempDecoder(RelocName, idx, SyncObject1)
  425. r.Sync(SyncSym)
  426. r.Sync(SyncPkg)
  427. ridx = r.Reloc(RelocPkg)
  428. name = r.String()
  429. rcode = r.Code(SyncCodeObj)
  430. pr.RetireDecoder(&r)
  431. }
  432. path := pr.PeekPkgPath(ridx)
  433. assert(name != "")
  434. tag := CodeObj(rcode)
  435. return path, name, tag
  436. }
  437. // Version reports the version of the bitstream.
  438. func (w *Decoder) Version() Version { return w.common.version }