stream.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519
  1. // Copyright 2010 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package json
  5. import (
  6. "bytes"
  7. "io"
  8. )
  9. // A Decoder reads and decodes JSON values from an input stream.
  10. type Decoder struct {
  11. r io.Reader
  12. buf []byte
  13. d decodeState
  14. scanp int // start of unread data in buf
  15. scanned int64 // amount of data already scanned
  16. scan scanner
  17. err error
  18. tokenState int
  19. tokenStack []int
  20. }
  21. // NewDecoder returns a new decoder that reads from r.
  22. //
  23. // The decoder introduces its own buffering and may
  24. // read data from r beyond the JSON values requested.
  25. func NewDecoder(r io.Reader) *Decoder {
  26. return &Decoder{r: r}
  27. }
  28. // UseNumber causes the Decoder to unmarshal a number into an interface{} as a
  29. // Number instead of as a float64.
  30. func (dec *Decoder) UseNumber() { dec.d.useNumber = true }
  31. // DisallowUnknownFields causes the Decoder to return an error when the destination
  32. // is a struct and the input contains object keys which do not match any
  33. // non-ignored, exported fields in the destination.
  34. func (dec *Decoder) DisallowUnknownFields() { dec.d.disallowUnknownFields = true }
  35. // Decode reads the next JSON-encoded value from its
  36. // input and stores it in the value pointed to by v.
  37. //
  38. // See the documentation for Unmarshal for details about
  39. // the conversion of JSON into a Go value.
  40. func (dec *Decoder) Decode(v any) error {
  41. if dec.err != nil {
  42. return dec.err
  43. }
  44. if err := dec.tokenPrepareForDecode(); err != nil {
  45. return err
  46. }
  47. if !dec.tokenValueAllowed() {
  48. return &SyntaxError{msg: "not at beginning of value", Offset: dec.InputOffset()}
  49. }
  50. // Read whole value into buffer.
  51. n, err := dec.readValue()
  52. if err != nil {
  53. return err
  54. }
  55. dec.d.init(dec.buf[dec.scanp : dec.scanp+n])
  56. dec.scanp += n
  57. // Don't save err from unmarshal into dec.err:
  58. // the connection is still usable since we read a complete JSON
  59. // object from it before the error happened.
  60. err = dec.d.unmarshal(v)
  61. // fixup token streaming state
  62. dec.tokenValueEnd()
  63. return err
  64. }
  65. // Buffered returns a reader of the data remaining in the Decoder's
  66. // buffer. The reader is valid until the next call to Decode.
  67. func (dec *Decoder) Buffered() io.Reader {
  68. return bytes.NewReader(dec.buf[dec.scanp:])
  69. }
  70. // readValue reads a JSON value into dec.buf.
  71. // It returns the length of the encoding.
  72. func (dec *Decoder) readValue() (int, error) {
  73. dec.scan.reset()
  74. scanp := dec.scanp
  75. var err error
  76. Input:
  77. // help the compiler see that scanp is never negative, so it can remove
  78. // some bounds checks below.
  79. for scanp >= 0 {
  80. // Look in the buffer for a new value.
  81. for ; scanp < len(dec.buf); scanp++ {
  82. c := dec.buf[scanp]
  83. dec.scan.bytes++
  84. switch dec.scan.step(&dec.scan, c) {
  85. case scanEnd:
  86. // scanEnd is delayed one byte so we decrement
  87. // the scanner bytes count by 1 to ensure that
  88. // this value is correct in the next call of Decode.
  89. dec.scan.bytes--
  90. break Input
  91. case scanEndObject, scanEndArray:
  92. // scanEnd is delayed one byte.
  93. // We might block trying to get that byte from src,
  94. // so instead invent a space byte.
  95. if stateEndValue(&dec.scan, ' ') == scanEnd {
  96. scanp++
  97. break Input
  98. }
  99. case scanError:
  100. dec.err = dec.scan.err
  101. return 0, dec.scan.err
  102. }
  103. }
  104. // Did the last read have an error?
  105. // Delayed until now to allow buffer scan.
  106. if err != nil {
  107. if err == io.EOF {
  108. if dec.scan.step(&dec.scan, ' ') == scanEnd {
  109. break Input
  110. }
  111. if nonSpace(dec.buf) {
  112. err = io.ErrUnexpectedEOF
  113. }
  114. }
  115. dec.err = err
  116. return 0, err
  117. }
  118. n := scanp - dec.scanp
  119. err = dec.refill()
  120. scanp = dec.scanp + n
  121. }
  122. return scanp - dec.scanp, nil
  123. }
  124. func (dec *Decoder) refill() error {
  125. // Make room to read more into the buffer.
  126. // First slide down data already consumed.
  127. if dec.scanp > 0 {
  128. dec.scanned += int64(dec.scanp)
  129. n := copy(dec.buf, dec.buf[dec.scanp:])
  130. dec.buf = dec.buf[:n]
  131. dec.scanp = 0
  132. }
  133. // Grow buffer if not large enough.
  134. const minRead = 512
  135. if cap(dec.buf)-len(dec.buf) < minRead {
  136. newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead)
  137. copy(newBuf, dec.buf)
  138. dec.buf = newBuf
  139. }
  140. // Read. Delay error for next iteration (after scan).
  141. n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)])
  142. dec.buf = dec.buf[0 : len(dec.buf)+n]
  143. return err
  144. }
  145. func nonSpace(b []byte) bool {
  146. for _, c := range b {
  147. if !isSpace(c) {
  148. return true
  149. }
  150. }
  151. return false
  152. }
  153. // An Encoder writes JSON values to an output stream.
  154. type Encoder struct {
  155. w io.Writer
  156. err error
  157. escapeHTML bool
  158. indentBuf *bytes.Buffer
  159. indentPrefix string
  160. indentValue string
  161. }
  162. // NewEncoder returns a new encoder that writes to w.
  163. func NewEncoder(w io.Writer) *Encoder {
  164. return &Encoder{w: w, escapeHTML: true}
  165. }
  166. // Encode writes the JSON encoding of v to the stream,
  167. // followed by a newline character.
  168. //
  169. // See the documentation for Marshal for details about the
  170. // conversion of Go values to JSON.
  171. func (enc *Encoder) Encode(v any) error {
  172. if enc.err != nil {
  173. return enc.err
  174. }
  175. e := newEncodeState()
  176. err := e.marshal(v, encOpts{escapeHTML: enc.escapeHTML})
  177. if err != nil {
  178. return err
  179. }
  180. // Terminate each value with a newline.
  181. // This makes the output look a little nicer
  182. // when debugging, and some kind of space
  183. // is required if the encoded value was a number,
  184. // so that the reader knows there aren't more
  185. // digits coming.
  186. e.WriteByte('\n')
  187. b := e.Bytes()
  188. if enc.indentPrefix != "" || enc.indentValue != "" {
  189. if enc.indentBuf == nil {
  190. enc.indentBuf = new(bytes.Buffer)
  191. }
  192. enc.indentBuf.Reset()
  193. err = Indent(enc.indentBuf, b, enc.indentPrefix, enc.indentValue)
  194. if err != nil {
  195. return err
  196. }
  197. b = enc.indentBuf.Bytes()
  198. }
  199. if _, err = enc.w.Write(b); err != nil {
  200. enc.err = err
  201. }
  202. encodeStatePool.Put(e)
  203. return err
  204. }
  205. // SetIndent instructs the encoder to format each subsequent encoded
  206. // value as if indented by the package-level function Indent(dst, src, prefix, indent).
  207. // Calling SetIndent("", "") disables indentation.
  208. func (enc *Encoder) SetIndent(prefix, indent string) {
  209. enc.indentPrefix = prefix
  210. enc.indentValue = indent
  211. }
  212. // SetEscapeHTML specifies whether problematic HTML characters
  213. // should be escaped inside JSON quoted strings.
  214. // The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e
  215. // to avoid certain safety problems that can arise when embedding JSON in HTML.
  216. //
  217. // In non-HTML settings where the escaping interferes with the readability
  218. // of the output, SetEscapeHTML(false) disables this behavior.
  219. func (enc *Encoder) SetEscapeHTML(on bool) {
  220. enc.escapeHTML = on
  221. }
  222. /*
  223. // RawMessage is a raw encoded JSON value.
  224. // It implements Marshaler and Unmarshaler and can
  225. // be used to delay JSON decoding or precompute a JSON encoding.
  226. type RawMessage []byte
  227. // MarshalJSON returns m as the JSON encoding of m.
  228. func (m RawMessage) MarshalJSON() ([]byte, error) {
  229. if m == nil {
  230. return []byte("null"), nil
  231. }
  232. return m, nil
  233. }
  234. // UnmarshalJSON sets *m to a copy of data.
  235. func (m *RawMessage) UnmarshalJSON(data []byte) error {
  236. if m == nil {
  237. return errors.New("json.RawMessage: UnmarshalJSON on nil pointer")
  238. }
  239. *m = append((*m)[0:0], data...)
  240. return nil
  241. }
  242. */
  243. var _ Marshaler = (*RawMessage)(nil)
  244. var _ Unmarshaler = (*RawMessage)(nil)
  245. /*
  246. // A Token holds a value of one of these types:
  247. //
  248. // Delim, for the four JSON delimiters [ ] { }
  249. // bool, for JSON booleans
  250. // float64, for JSON numbers
  251. // Number, for JSON numbers
  252. // string, for JSON string literals
  253. // nil, for JSON null
  254. //
  255. type Token any
  256. */
  257. const (
  258. tokenTopValue = iota
  259. tokenArrayStart
  260. tokenArrayValue
  261. tokenArrayComma
  262. tokenObjectStart
  263. tokenObjectKey
  264. tokenObjectColon
  265. tokenObjectValue
  266. tokenObjectComma
  267. )
  268. // advance tokenstate from a separator state to a value state
  269. func (dec *Decoder) tokenPrepareForDecode() error {
  270. // Note: Not calling peek before switch, to avoid
  271. // putting peek into the standard Decode path.
  272. // peek is only called when using the Token API.
  273. switch dec.tokenState {
  274. case tokenArrayComma:
  275. c, err := dec.peek()
  276. if err != nil {
  277. return err
  278. }
  279. if c != ',' {
  280. return &SyntaxError{"expected comma after array element", dec.InputOffset()}
  281. }
  282. dec.scanp++
  283. dec.tokenState = tokenArrayValue
  284. case tokenObjectColon:
  285. c, err := dec.peek()
  286. if err != nil {
  287. return err
  288. }
  289. if c != ':' {
  290. return &SyntaxError{"expected colon after object key", dec.InputOffset()}
  291. }
  292. dec.scanp++
  293. dec.tokenState = tokenObjectValue
  294. }
  295. return nil
  296. }
  297. func (dec *Decoder) tokenValueAllowed() bool {
  298. switch dec.tokenState {
  299. case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue:
  300. return true
  301. }
  302. return false
  303. }
  304. func (dec *Decoder) tokenValueEnd() {
  305. switch dec.tokenState {
  306. case tokenArrayStart, tokenArrayValue:
  307. dec.tokenState = tokenArrayComma
  308. case tokenObjectValue:
  309. dec.tokenState = tokenObjectComma
  310. }
  311. }
  312. /*
  313. // A Delim is a JSON array or object delimiter, one of [ ] { or }.
  314. type Delim rune
  315. func (d Delim) String() string {
  316. return string(d)
  317. }
  318. */
  319. // Token returns the next JSON token in the input stream.
  320. // At the end of the input stream, Token returns nil, io.EOF.
  321. //
  322. // Token guarantees that the delimiters [ ] { } it returns are
  323. // properly nested and matched: if Token encounters an unexpected
  324. // delimiter in the input, it will return an error.
  325. //
  326. // The input stream consists of basic JSON values—bool, string,
  327. // number, and null—along with delimiters [ ] { } of type Delim
  328. // to mark the start and end of arrays and objects.
  329. // Commas and colons are elided.
  330. func (dec *Decoder) Token() (Token, error) {
  331. for {
  332. c, err := dec.peek()
  333. if err != nil {
  334. return nil, err
  335. }
  336. switch c {
  337. case '[':
  338. if !dec.tokenValueAllowed() {
  339. return dec.tokenError(c)
  340. }
  341. dec.scanp++
  342. dec.tokenStack = append(dec.tokenStack, dec.tokenState)
  343. dec.tokenState = tokenArrayStart
  344. return Delim('['), nil
  345. case ']':
  346. if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma {
  347. return dec.tokenError(c)
  348. }
  349. dec.scanp++
  350. dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
  351. dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
  352. dec.tokenValueEnd()
  353. return Delim(']'), nil
  354. case '{':
  355. if !dec.tokenValueAllowed() {
  356. return dec.tokenError(c)
  357. }
  358. dec.scanp++
  359. dec.tokenStack = append(dec.tokenStack, dec.tokenState)
  360. dec.tokenState = tokenObjectStart
  361. return Delim('{'), nil
  362. case '}':
  363. if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma {
  364. return dec.tokenError(c)
  365. }
  366. dec.scanp++
  367. dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
  368. dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
  369. dec.tokenValueEnd()
  370. return Delim('}'), nil
  371. case ':':
  372. if dec.tokenState != tokenObjectColon {
  373. return dec.tokenError(c)
  374. }
  375. dec.scanp++
  376. dec.tokenState = tokenObjectValue
  377. continue
  378. case ',':
  379. if dec.tokenState == tokenArrayComma {
  380. dec.scanp++
  381. dec.tokenState = tokenArrayValue
  382. continue
  383. }
  384. if dec.tokenState == tokenObjectComma {
  385. dec.scanp++
  386. dec.tokenState = tokenObjectKey
  387. continue
  388. }
  389. return dec.tokenError(c)
  390. case '"':
  391. if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey {
  392. var x string
  393. old := dec.tokenState
  394. dec.tokenState = tokenTopValue
  395. err := dec.Decode(&x)
  396. dec.tokenState = old
  397. if err != nil {
  398. return nil, err
  399. }
  400. dec.tokenState = tokenObjectColon
  401. return x, nil
  402. }
  403. fallthrough
  404. default:
  405. if !dec.tokenValueAllowed() {
  406. return dec.tokenError(c)
  407. }
  408. var x any
  409. if err := dec.Decode(&x); err != nil {
  410. return nil, err
  411. }
  412. return x, nil
  413. }
  414. }
  415. }
  416. func (dec *Decoder) tokenError(c byte) (Token, error) {
  417. var context string
  418. switch dec.tokenState {
  419. case tokenTopValue:
  420. context = " looking for beginning of value"
  421. case tokenArrayStart, tokenArrayValue, tokenObjectValue:
  422. context = " looking for beginning of value"
  423. case tokenArrayComma:
  424. context = " after array element"
  425. case tokenObjectKey:
  426. context = " looking for beginning of object key string"
  427. case tokenObjectColon:
  428. context = " after object key"
  429. case tokenObjectComma:
  430. context = " after object key:value pair"
  431. }
  432. return nil, &SyntaxError{"invalid character " + quoteChar(c) + context, dec.InputOffset()}
  433. }
  434. // More reports whether there is another element in the
  435. // current array or object being parsed.
  436. func (dec *Decoder) More() bool {
  437. c, err := dec.peek()
  438. return err == nil && c != ']' && c != '}'
  439. }
  440. func (dec *Decoder) peek() (byte, error) {
  441. var err error
  442. for {
  443. for i := dec.scanp; i < len(dec.buf); i++ {
  444. c := dec.buf[i]
  445. if isSpace(c) {
  446. continue
  447. }
  448. dec.scanp = i
  449. return c, nil
  450. }
  451. // buffer has been scanned, now report any error
  452. if err != nil {
  453. return 0, err
  454. }
  455. err = dec.refill()
  456. }
  457. }
  458. // InputOffset returns the input stream byte offset of the current decoder position.
  459. // The offset gives the location of the end of the most recently returned token
  460. // and the beginning of the next token.
  461. func (dec *Decoder) InputOffset() int64 {
  462. return dec.scanned + int64(dec.scanp)
  463. }