stream.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572
  1. // Copyright 2010 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package json
  5. import (
  6. "bytes"
  7. "errors"
  8. "io"
  9. )
  10. // A Decoder reads and decodes JSON values from an input stream.
  11. type Decoder struct {
  12. r io.Reader
  13. buf []byte
  14. d decodeState
  15. scanp int // start of unread data in buf
  16. scanned int64 // amount of data already scanned
  17. scan scanner
  18. err error
  19. tokenState int
  20. tokenStack []int
  21. }
  22. // NewDecoder returns a new decoder that reads from r.
  23. //
  24. // The decoder introduces its own buffering and may
  25. // read data from r beyond the JSON values requested.
  26. func NewDecoder(r io.Reader) *Decoder {
  27. return &Decoder{r: r}
  28. }
  29. // UseNumber causes the Decoder to unmarshal a number into an interface{} as a
  30. // Number instead of as a float64.
  31. func (dec *Decoder) UseNumber() { dec.d.useNumber = true }
  32. // DisallowUnknownFields causes the Decoder to return an error when the destination
  33. // is a struct and the input contains object keys which do not match any
  34. // non-ignored, exported fields in the destination.
  35. func (dec *Decoder) DisallowUnknownFields() { dec.d.disallowUnknownFields = true }
  36. // SetDiscriminator tells the decoder to check if JSON objects include a
  37. // discriminator that specifies the Go type into which the object should be
  38. // decoded.
  39. // Map and struct values are encoded as JSON objects as normal, but with an
  40. // additional field (typeFieldName) that specifies the object's Go type.
  41. // All other values are encoded inside an outer JSON object with a field
  42. // (typeFieldName) that specifies the value's Go type and a field
  43. // (valueFieldName) that specifies the actual value.
  44. // An optional typeFn may be provided to enable looking up custom types based
  45. // on type name strings. Built-in types are handled automatically and will be
  46. // ignored if they are returned by the typeFn.
  47. // Calling SetDiscriminator("", "", nil) disables the discriminator.
  48. func (dec *Decoder) SetDiscriminator(typeFieldName, valueFieldName string, typeFn DiscriminatorToTypeFunc) {
  49. dec.d.discriminatorTypeFieldName = typeFieldName
  50. dec.d.discriminatorValueFieldName = valueFieldName
  51. dec.d.discriminatorToTypeFn = typeFn
  52. }
  53. // Decode reads the next JSON-encoded value from its
  54. // input and stores it in the value pointed to by v.
  55. //
  56. // See the documentation for Unmarshal for details about
  57. // the conversion of JSON into a Go value.
  58. func (dec *Decoder) Decode(v interface{}) error {
  59. if dec.err != nil {
  60. return dec.err
  61. }
  62. if err := dec.tokenPrepareForDecode(); err != nil {
  63. return err
  64. }
  65. if !dec.tokenValueAllowed() {
  66. return &SyntaxError{msg: "not at beginning of value", Offset: dec.InputOffset()}
  67. }
  68. // Read whole value into buffer.
  69. n, err := dec.readValue()
  70. if err != nil {
  71. return err
  72. }
  73. dec.d.init(dec.buf[dec.scanp : dec.scanp+n])
  74. dec.scanp += n
  75. // Don't save err from unmarshal into dec.err:
  76. // the connection is still usable since we read a complete JSON
  77. // object from it before the error happened.
  78. err = dec.d.unmarshal(v)
  79. // fixup token streaming state
  80. dec.tokenValueEnd()
  81. return err
  82. }
  83. // Buffered returns a reader of the data remaining in the Decoder's
  84. // buffer. The reader is valid until the next call to Decode.
  85. func (dec *Decoder) Buffered() io.Reader {
  86. return bytes.NewReader(dec.buf[dec.scanp:])
  87. }
  88. // readValue reads a JSON value into dec.buf.
  89. // It returns the length of the encoding.
  90. func (dec *Decoder) readValue() (int, error) {
  91. dec.scan.reset()
  92. scanp := dec.scanp
  93. var err error
  94. Input:
  95. // help the compiler see that scanp is never negative, so it can remove
  96. // some bounds checks below.
  97. for scanp >= 0 {
  98. // Look in the buffer for a new value.
  99. for ; scanp < len(dec.buf); scanp++ {
  100. c := dec.buf[scanp]
  101. dec.scan.bytes++
  102. switch dec.scan.step(&dec.scan, c) {
  103. case scanEnd:
  104. // scanEnd is delayed one byte so we decrement
  105. // the scanner bytes count by 1 to ensure that
  106. // this value is correct in the next call of Decode.
  107. dec.scan.bytes--
  108. break Input
  109. case scanEndObject, scanEndArray:
  110. // scanEnd is delayed one byte.
  111. // We might block trying to get that byte from src,
  112. // so instead invent a space byte.
  113. if stateEndValue(&dec.scan, ' ') == scanEnd {
  114. scanp++
  115. break Input
  116. }
  117. case scanError:
  118. dec.err = dec.scan.err
  119. return 0, dec.scan.err
  120. }
  121. }
  122. // Did the last read have an error?
  123. // Delayed until now to allow buffer scan.
  124. if err != nil {
  125. if err == io.EOF {
  126. if dec.scan.step(&dec.scan, ' ') == scanEnd {
  127. break Input
  128. }
  129. if nonSpace(dec.buf) {
  130. err = io.ErrUnexpectedEOF
  131. }
  132. }
  133. dec.err = err
  134. return 0, err
  135. }
  136. n := scanp - dec.scanp
  137. err = dec.refill()
  138. scanp = dec.scanp + n
  139. }
  140. return scanp - dec.scanp, nil
  141. }
  142. func (dec *Decoder) refill() error {
  143. // Make room to read more into the buffer.
  144. // First slide down data already consumed.
  145. if dec.scanp > 0 {
  146. dec.scanned += int64(dec.scanp)
  147. n := copy(dec.buf, dec.buf[dec.scanp:])
  148. dec.buf = dec.buf[:n]
  149. dec.scanp = 0
  150. }
  151. // Grow buffer if not large enough.
  152. const minRead = 512
  153. if cap(dec.buf)-len(dec.buf) < minRead {
  154. newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead)
  155. copy(newBuf, dec.buf)
  156. dec.buf = newBuf
  157. }
  158. // Read. Delay error for next iteration (after scan).
  159. n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)])
  160. dec.buf = dec.buf[0 : len(dec.buf)+n]
  161. return err
  162. }
  163. func nonSpace(b []byte) bool {
  164. for _, c := range b {
  165. if !isSpace(c) {
  166. return true
  167. }
  168. }
  169. return false
  170. }
  171. // An Encoder writes JSON values to an output stream.
  172. type Encoder struct {
  173. w io.Writer
  174. err error
  175. escapeHTML bool
  176. indentBuf *bytes.Buffer
  177. indentPrefix string
  178. indentValue string
  179. discriminatorTypeFieldName string
  180. discriminatorValueFieldName string
  181. discriminatorEncodeMode DiscriminatorEncodeMode
  182. typeToDiscriminatorFn TypeToDiscriminatorFunc
  183. }
  184. // NewEncoder returns a new encoder that writes to w.
  185. func NewEncoder(w io.Writer) *Encoder {
  186. return &Encoder{w: w, escapeHTML: true}
  187. }
  188. // Encode writes the JSON encoding of v to the stream,
  189. // followed by a newline character.
  190. //
  191. // See the documentation for Marshal for details about the
  192. // conversion of Go values to JSON.
  193. func (enc *Encoder) Encode(v interface{}) error {
  194. if enc.err != nil {
  195. return enc.err
  196. }
  197. e := newEncodeState()
  198. err := e.marshal(v, encOpts{
  199. escapeHTML: enc.escapeHTML,
  200. discriminatorTypeFieldName: enc.discriminatorTypeFieldName,
  201. discriminatorValueFieldName: enc.discriminatorValueFieldName,
  202. discriminatorEncodeMode: enc.discriminatorEncodeMode,
  203. discriminatorValueFn: enc.typeToDiscriminatorFn,
  204. })
  205. if err != nil {
  206. return err
  207. }
  208. // Terminate each value with a newline.
  209. // This makes the output look a little nicer
  210. // when debugging, and some kind of space
  211. // is required if the encoded value was a number,
  212. // so that the reader knows there aren't more
  213. // digits coming.
  214. e.WriteByte('\n')
  215. b := e.Bytes()
  216. if enc.indentPrefix != "" || enc.indentValue != "" {
  217. if enc.indentBuf == nil {
  218. enc.indentBuf = new(bytes.Buffer)
  219. }
  220. enc.indentBuf.Reset()
  221. err = Indent(enc.indentBuf, b, enc.indentPrefix, enc.indentValue)
  222. if err != nil {
  223. return err
  224. }
  225. b = enc.indentBuf.Bytes()
  226. }
  227. if _, err = enc.w.Write(b); err != nil {
  228. enc.err = err
  229. }
  230. encodeStatePool.Put(e)
  231. return err
  232. }
  233. // SetIndent instructs the encoder to format each subsequent encoded
  234. // value as if indented by the package-level function Indent(dst, src, prefix, indent).
  235. // Calling SetIndent("", "") disables indentation.
  236. func (enc *Encoder) SetIndent(prefix, indent string) {
  237. enc.indentPrefix = prefix
  238. enc.indentValue = indent
  239. }
  240. // SetEscapeHTML specifies whether problematic HTML characters
  241. // should be escaped inside JSON quoted strings.
  242. // The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e
  243. // to avoid certain safety problems that can arise when embedding JSON in HTML.
  244. //
  245. // In non-HTML settings where the escaping interferes with the readability
  246. // of the output, SetEscapeHTML(false) disables this behavior.
  247. func (enc *Encoder) SetEscapeHTML(on bool) {
  248. enc.escapeHTML = on
  249. }
  250. // SetDiscriminator specifies that a value stored in an interface should be
  251. // encoded with information about the value's Go type.
  252. // Map and struct values are encoded as JSON objects as normal, but with an
  253. // additional field (typeFieldName) that specifies the object's Go type.
  254. // All other values are encoded inside an outer JSON object with a field
  255. // (typeFieldName) that specifies the value's Go type and a field
  256. // (valueFieldName) that specifies the actual value.
  257. // A mask (mode) is available to control the encoder's behavior.
  258. // Calling SetDiscriminator("", "", 0) disables the discriminator.
  259. func (enc *Encoder) SetDiscriminator(typeFieldName, valueFieldName string, mode DiscriminatorEncodeMode) {
  260. enc.discriminatorTypeFieldName = typeFieldName
  261. enc.discriminatorValueFieldName = valueFieldName
  262. enc.discriminatorEncodeMode = mode
  263. enc.typeToDiscriminatorFn = DefaultDiscriminatorFunc
  264. }
  265. // SetTypeToDiscriminatorFunc allows for customizing the discriminator value for
  266. // different types. This may be useful if the golang struct names do not match
  267. // the desired values. One example would be if discriminator values in a
  268. // protocol require special characters or start with lowercase letter. The
  269. // TypeToDiscriminatorFunc implementation may return empty string to suppress
  270. // the rendering of discriminator for specific type(s).
  271. func (enc *Encoder) SetTypeToDiscriminatorFunc(f TypeToDiscriminatorFunc) {
  272. if f == nil {
  273. enc.typeToDiscriminatorFn = DefaultDiscriminatorFunc
  274. return
  275. }
  276. enc.typeToDiscriminatorFn = f
  277. }
  278. // RawMessage is a raw encoded JSON value.
  279. // It implements Marshaler and Unmarshaler and can
  280. // be used to delay JSON decoding or precompute a JSON encoding.
  281. type RawMessage []byte
  282. // MarshalJSON returns m as the JSON encoding of m.
  283. func (m RawMessage) MarshalJSON() ([]byte, error) {
  284. if m == nil {
  285. return []byte("null"), nil
  286. }
  287. return m, nil
  288. }
  289. // UnmarshalJSON sets *m to a copy of data.
  290. func (m *RawMessage) UnmarshalJSON(data []byte) error {
  291. if m == nil {
  292. return errors.New("json.RawMessage: UnmarshalJSON on nil pointer")
  293. }
  294. *m = append((*m)[0:0], data...)
  295. return nil
  296. }
  297. var _ Marshaler = (*RawMessage)(nil)
  298. var _ Unmarshaler = (*RawMessage)(nil)
  299. // A Token holds a value of one of these types:
  300. //
  301. // Delim, for the four JSON delimiters [ ] { }
  302. // bool, for JSON booleans
  303. // float64, for JSON numbers
  304. // Number, for JSON numbers
  305. // string, for JSON string literals
  306. // nil, for JSON null
  307. type Token interface{}
  308. const (
  309. tokenTopValue = iota
  310. tokenArrayStart
  311. tokenArrayValue
  312. tokenArrayComma
  313. tokenObjectStart
  314. tokenObjectKey
  315. tokenObjectColon
  316. tokenObjectValue
  317. tokenObjectComma
  318. )
  319. // advance tokenstate from a separator state to a value state
  320. func (dec *Decoder) tokenPrepareForDecode() error {
  321. // Note: Not calling peek before switch, to avoid
  322. // putting peek into the standard Decode path.
  323. // peek is only called when using the Token API.
  324. switch dec.tokenState {
  325. case tokenArrayComma:
  326. c, err := dec.peek()
  327. if err != nil {
  328. return err
  329. }
  330. if c != ',' {
  331. return &SyntaxError{"expected comma after array element", dec.InputOffset()}
  332. }
  333. dec.scanp++
  334. dec.tokenState = tokenArrayValue
  335. case tokenObjectColon:
  336. c, err := dec.peek()
  337. if err != nil {
  338. return err
  339. }
  340. if c != ':' {
  341. return &SyntaxError{"expected colon after object key", dec.InputOffset()}
  342. }
  343. dec.scanp++
  344. dec.tokenState = tokenObjectValue
  345. }
  346. return nil
  347. }
  348. func (dec *Decoder) tokenValueAllowed() bool {
  349. switch dec.tokenState {
  350. case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue:
  351. return true
  352. }
  353. return false
  354. }
  355. func (dec *Decoder) tokenValueEnd() {
  356. switch dec.tokenState {
  357. case tokenArrayStart, tokenArrayValue:
  358. dec.tokenState = tokenArrayComma
  359. case tokenObjectValue:
  360. dec.tokenState = tokenObjectComma
  361. }
  362. }
  363. // A Delim is a JSON array or object delimiter, one of [ ] { or }.
  364. type Delim rune
  365. func (d Delim) String() string {
  366. return string(d)
  367. }
  368. // Token returns the next JSON token in the input stream.
  369. // At the end of the input stream, Token returns nil, io.EOF.
  370. //
  371. // Token guarantees that the delimiters [ ] { } it returns are
  372. // properly nested and matched: if Token encounters an unexpected
  373. // delimiter in the input, it will return an error.
  374. //
  375. // The input stream consists of basic JSON values—bool, string,
  376. // number, and null—along with delimiters [ ] { } of type Delim
  377. // to mark the start and end of arrays and objects.
  378. // Commas and colons are elided.
  379. func (dec *Decoder) Token() (Token, error) {
  380. for {
  381. c, err := dec.peek()
  382. if err != nil {
  383. return nil, err
  384. }
  385. switch c {
  386. case '[':
  387. if !dec.tokenValueAllowed() {
  388. return dec.tokenError(c)
  389. }
  390. dec.scanp++
  391. dec.tokenStack = append(dec.tokenStack, dec.tokenState)
  392. dec.tokenState = tokenArrayStart
  393. return Delim('['), nil
  394. case ']':
  395. if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma {
  396. return dec.tokenError(c)
  397. }
  398. dec.scanp++
  399. dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
  400. dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
  401. dec.tokenValueEnd()
  402. return Delim(']'), nil
  403. case '{':
  404. if !dec.tokenValueAllowed() {
  405. return dec.tokenError(c)
  406. }
  407. dec.scanp++
  408. dec.tokenStack = append(dec.tokenStack, dec.tokenState)
  409. dec.tokenState = tokenObjectStart
  410. return Delim('{'), nil
  411. case '}':
  412. if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma {
  413. return dec.tokenError(c)
  414. }
  415. dec.scanp++
  416. dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
  417. dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
  418. dec.tokenValueEnd()
  419. return Delim('}'), nil
  420. case ':':
  421. if dec.tokenState != tokenObjectColon {
  422. return dec.tokenError(c)
  423. }
  424. dec.scanp++
  425. dec.tokenState = tokenObjectValue
  426. continue
  427. case ',':
  428. if dec.tokenState == tokenArrayComma {
  429. dec.scanp++
  430. dec.tokenState = tokenArrayValue
  431. continue
  432. }
  433. if dec.tokenState == tokenObjectComma {
  434. dec.scanp++
  435. dec.tokenState = tokenObjectKey
  436. continue
  437. }
  438. return dec.tokenError(c)
  439. case '"':
  440. if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey {
  441. var x string
  442. old := dec.tokenState
  443. dec.tokenState = tokenTopValue
  444. err := dec.Decode(&x)
  445. dec.tokenState = old
  446. if err != nil {
  447. return nil, err
  448. }
  449. dec.tokenState = tokenObjectColon
  450. return x, nil
  451. }
  452. fallthrough
  453. default:
  454. if !dec.tokenValueAllowed() {
  455. return dec.tokenError(c)
  456. }
  457. var x interface{}
  458. if err := dec.Decode(&x); err != nil {
  459. return nil, err
  460. }
  461. return x, nil
  462. }
  463. }
  464. }
  465. func (dec *Decoder) tokenError(c byte) (Token, error) {
  466. var context string
  467. switch dec.tokenState {
  468. case tokenTopValue:
  469. context = " looking for beginning of value"
  470. case tokenArrayStart, tokenArrayValue, tokenObjectValue:
  471. context = " looking for beginning of value"
  472. case tokenArrayComma:
  473. context = " after array element"
  474. case tokenObjectKey:
  475. context = " looking for beginning of object key string"
  476. case tokenObjectColon:
  477. context = " after object key"
  478. case tokenObjectComma:
  479. context = " after object key:value pair"
  480. }
  481. return nil, &SyntaxError{"invalid character " + quoteChar(c) + context, dec.InputOffset()}
  482. }
  483. // More reports whether there is another element in the
  484. // current array or object being parsed.
  485. func (dec *Decoder) More() bool {
  486. c, err := dec.peek()
  487. return err == nil && c != ']' && c != '}'
  488. }
  489. func (dec *Decoder) peek() (byte, error) {
  490. var err error
  491. for {
  492. for i := dec.scanp; i < len(dec.buf); i++ {
  493. c := dec.buf[i]
  494. if isSpace(c) {
  495. continue
  496. }
  497. dec.scanp = i
  498. return c, nil
  499. }
  500. // buffer has been scanned, now report any error
  501. if err != nil {
  502. return 0, err
  503. }
  504. err = dec.refill()
  505. }
  506. }
  507. // InputOffset returns the input stream byte offset of the current decoder position.
  508. // The offset gives the location of the end of the most recently returned token
  509. // and the beginning of the next token.
  510. func (dec *Decoder) InputOffset() int64 {
  511. return dec.scanned + int64(dec.scanp)
  512. }