encoder.go 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. package xml2json
  2. import (
  3. "bytes"
  4. "io"
  5. "unicode/utf8"
  6. )
  7. // An Encoder writes JSON objects to an output stream.
  8. type Encoder struct {
  9. w io.Writer
  10. err error
  11. contentPrefix string
  12. attributePrefix string
  13. tc encoderTypeConverter
  14. }
  15. // NewEncoder returns a new encoder that writes to w.
  16. func NewEncoder(w io.Writer, plugins ...plugin) *Encoder {
  17. e := &Encoder{w: w, contentPrefix: contentPrefix, attributePrefix: attrPrefix}
  18. for _, p := range plugins {
  19. e = p.AddToEncoder(e)
  20. }
  21. return e
  22. }
  23. // Encode writes the JSON encoding of v to the stream
  24. func (enc *Encoder) Encode(root *Node) error {
  25. if enc.err != nil {
  26. return enc.err
  27. }
  28. if root == nil {
  29. return nil
  30. }
  31. enc.err = enc.format(root, 0)
  32. // Terminate each value with a newline.
  33. // This makes the output look a little nicer
  34. // when debugging, and some kind of space
  35. // is required if the encoded value was a number,
  36. // so that the reader knows there aren't more
  37. // digits coming.
  38. enc.write("\n")
  39. return enc.err
  40. }
  41. func (enc *Encoder) format(n *Node, lvl int) error {
  42. if n.IsComplex() {
  43. enc.write("{")
  44. // Add data as an additional attibute (if any)
  45. if len(n.Data) > 0 {
  46. enc.write("\"")
  47. enc.write(enc.contentPrefix)
  48. enc.write("content")
  49. enc.write("\": ")
  50. enc.write(sanitiseString(n.Data))
  51. enc.write(", ")
  52. }
  53. i := 0
  54. tot := len(n.Children)
  55. for label, children := range n.Children {
  56. enc.write("\"")
  57. enc.write(label)
  58. enc.write("\": ")
  59. if n.ChildrenAlwaysAsArray || len(children) > 1 {
  60. // Array
  61. enc.write("[")
  62. for j, c := range children {
  63. enc.format(c, lvl+1)
  64. if j < len(children)-1 {
  65. enc.write(", ")
  66. }
  67. }
  68. enc.write("]")
  69. } else {
  70. // Map
  71. enc.format(children[0], lvl+1)
  72. }
  73. if i < tot-1 {
  74. enc.write(", ")
  75. }
  76. i++
  77. }
  78. enc.write("}")
  79. } else {
  80. s := sanitiseString(n.Data)
  81. if enc.tc == nil {
  82. // do nothing
  83. } else {
  84. s = enc.tc.Convert(s)
  85. }
  86. enc.write(s)
  87. }
  88. return nil
  89. }
  90. func (enc *Encoder) write(s string) {
  91. enc.w.Write([]byte(s))
  92. }
  93. // https://golang.org/src/encoding/json/encode.go?s=5584:5627#L788
  94. var hex = "0123456789abcdef"
  95. func sanitiseString(s string) string {
  96. var buf bytes.Buffer
  97. buf.WriteByte('"')
  98. start := 0
  99. for i := 0; i < len(s); {
  100. if b := s[i]; b < utf8.RuneSelf {
  101. if 0x20 <= b && b != '\\' && b != '"' && b != '<' && b != '>' && b != '&' {
  102. i++
  103. continue
  104. }
  105. if start < i {
  106. buf.WriteString(s[start:i])
  107. }
  108. switch b {
  109. case '\\', '"':
  110. buf.WriteByte('\\')
  111. buf.WriteByte(b)
  112. case '\n':
  113. buf.WriteByte('\\')
  114. buf.WriteByte('n')
  115. case '\r':
  116. buf.WriteByte('\\')
  117. buf.WriteByte('r')
  118. case '\t':
  119. buf.WriteByte('\\')
  120. buf.WriteByte('t')
  121. default:
  122. // This encodes bytes < 0x20 except for \n and \r,
  123. // as well as <, > and &. The latter are escaped because they
  124. // can lead to security holes when user-controlled strings
  125. // are rendered into JSON and served to some browsers.
  126. buf.WriteString(`\u00`)
  127. buf.WriteByte(hex[b>>4])
  128. buf.WriteByte(hex[b&0xF])
  129. }
  130. i++
  131. start = i
  132. continue
  133. }
  134. c, size := utf8.DecodeRuneInString(s[i:])
  135. if c == utf8.RuneError && size == 1 {
  136. if start < i {
  137. buf.WriteString(s[start:i])
  138. }
  139. buf.WriteString(`\ufffd`)
  140. i += size
  141. start = i
  142. continue
  143. }
  144. // U+2028 is LINE SEPARATOR.
  145. // U+2029 is PARAGRAPH SEPARATOR.
  146. // They are both technically valid characters in JSON strings,
  147. // but don't work in JSONP, which has to be evaluated as JavaScript,
  148. // and can lead to security holes there. It is valid JSON to
  149. // escape them, so we do so unconditionally.
  150. // See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion.
  151. if c == '\u2028' || c == '\u2029' {
  152. if start < i {
  153. buf.WriteString(s[start:i])
  154. }
  155. buf.WriteString(`\u202`)
  156. buf.WriteByte(hex[c&0xF])
  157. i += size
  158. start = i
  159. continue
  160. }
  161. i += size
  162. }
  163. if start < len(s) {
  164. buf.WriteString(s[start:])
  165. }
  166. buf.WriteByte('"')
  167. return buf.String()
  168. }