parser.go 29 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283
  1. package jsonparser
  2. import (
  3. "bytes"
  4. "errors"
  5. "fmt"
  6. "strconv"
  7. )
  8. // Errors
  9. var (
  10. KeyPathNotFoundError = errors.New("Key path not found")
  11. UnknownValueTypeError = errors.New("Unknown value type")
  12. MalformedJsonError = errors.New("Malformed JSON error")
  13. MalformedStringError = errors.New("Value is string, but can't find closing '\"' symbol")
  14. MalformedArrayError = errors.New("Value is array, but can't find closing ']' symbol")
  15. MalformedObjectError = errors.New("Value looks like object, but can't find closing '}' symbol")
  16. MalformedValueError = errors.New("Value looks like Number/Boolean/None, but can't find its end: ',' or '}' symbol")
  17. OverflowIntegerError = errors.New("Value is number, but overflowed while parsing")
  18. MalformedStringEscapeError = errors.New("Encountered an invalid escape sequence in a string")
  19. )
  20. // How much stack space to allocate for unescaping JSON strings; if a string longer
  21. // than this needs to be escaped, it will result in a heap allocation
  22. const unescapeStackBufSize = 64
  23. func tokenEnd(data []byte) int {
  24. for i, c := range data {
  25. switch c {
  26. case ' ', '\n', '\r', '\t', ',', '}', ']':
  27. return i
  28. }
  29. }
  30. return len(data)
  31. }
  32. func findTokenStart(data []byte, token byte) int {
  33. for i := len(data) - 1; i >= 0; i-- {
  34. switch data[i] {
  35. case token:
  36. return i
  37. case '[', '{':
  38. return 0
  39. }
  40. }
  41. return 0
  42. }
  43. func findKeyStart(data []byte, key string) (int, error) {
  44. i := 0
  45. ln := len(data)
  46. if ln > 0 && (data[0] == '{' || data[0] == '[') {
  47. i = 1
  48. }
  49. var stackbuf [unescapeStackBufSize]byte // stack-allocated array for allocation-free unescaping of small strings
  50. if ku, err := Unescape(StringToBytes(key), stackbuf[:]); err == nil {
  51. key = bytesToString(&ku)
  52. }
  53. for i < ln {
  54. switch data[i] {
  55. case '"':
  56. i++
  57. keyBegin := i
  58. strEnd, keyEscaped := stringEnd(data[i:])
  59. if strEnd == -1 {
  60. break
  61. }
  62. i += strEnd
  63. keyEnd := i - 1
  64. valueOffset := nextToken(data[i:])
  65. if valueOffset == -1 {
  66. break
  67. }
  68. i += valueOffset
  69. // if string is a key, and key level match
  70. k := data[keyBegin:keyEnd]
  71. // for unescape: if there are no escape sequences, this is cheap; if there are, it is a
  72. // bit more expensive, but causes no allocations unless len(key) > unescapeStackBufSize
  73. if keyEscaped {
  74. if ku, err := Unescape(k, stackbuf[:]); err != nil {
  75. break
  76. } else {
  77. k = ku
  78. }
  79. }
  80. if data[i] == ':' && len(key) == len(k) && bytesToString(&k) == key {
  81. return keyBegin - 1, nil
  82. }
  83. case '[':
  84. end := blockEnd(data[i:], data[i], ']')
  85. if end != -1 {
  86. i = i + end
  87. }
  88. case '{':
  89. end := blockEnd(data[i:], data[i], '}')
  90. if end != -1 {
  91. i = i + end
  92. }
  93. }
  94. i++
  95. }
  96. return -1, KeyPathNotFoundError
  97. }
  98. func tokenStart(data []byte) int {
  99. for i := len(data) - 1; i >= 0; i-- {
  100. switch data[i] {
  101. case '\n', '\r', '\t', ',', '{', '[':
  102. return i
  103. }
  104. }
  105. return 0
  106. }
  107. // Find position of next character which is not whitespace
  108. func nextToken(data []byte) int {
  109. for i, c := range data {
  110. switch c {
  111. case ' ', '\n', '\r', '\t':
  112. continue
  113. default:
  114. return i
  115. }
  116. }
  117. return -1
  118. }
  119. // Find position of last character which is not whitespace
  120. func lastToken(data []byte) int {
  121. for i := len(data) - 1; i >= 0; i-- {
  122. switch data[i] {
  123. case ' ', '\n', '\r', '\t':
  124. continue
  125. default:
  126. return i
  127. }
  128. }
  129. return -1
  130. }
  131. // Tries to find the end of string
  132. // Support if string contains escaped quote symbols.
  133. func stringEnd(data []byte) (int, bool) {
  134. escaped := false
  135. for i, c := range data {
  136. if c == '"' {
  137. if !escaped {
  138. return i + 1, false
  139. } else {
  140. j := i - 1
  141. for {
  142. if j < 0 || data[j] != '\\' {
  143. return i + 1, true // even number of backslashes
  144. }
  145. j--
  146. if j < 0 || data[j] != '\\' {
  147. break // odd number of backslashes
  148. }
  149. j--
  150. }
  151. }
  152. } else if c == '\\' {
  153. escaped = true
  154. }
  155. }
  156. return -1, escaped
  157. }
  158. // Find end of the data structure, array or object.
  159. // For array openSym and closeSym will be '[' and ']', for object '{' and '}'
  160. func blockEnd(data []byte, openSym byte, closeSym byte) int {
  161. level := 0
  162. i := 0
  163. ln := len(data)
  164. for i < ln {
  165. switch data[i] {
  166. case '"': // If inside string, skip it
  167. se, _ := stringEnd(data[i+1:])
  168. if se == -1 {
  169. return -1
  170. }
  171. i += se
  172. case openSym: // If open symbol, increase level
  173. level++
  174. case closeSym: // If close symbol, increase level
  175. level--
  176. // If we have returned to the original level, we're done
  177. if level == 0 {
  178. return i + 1
  179. }
  180. }
  181. i++
  182. }
  183. return -1
  184. }
  185. func searchKeys(data []byte, keys ...string) int {
  186. keyLevel := 0
  187. level := 0
  188. i := 0
  189. ln := len(data)
  190. lk := len(keys)
  191. lastMatched := true
  192. if lk == 0 {
  193. return 0
  194. }
  195. var stackbuf [unescapeStackBufSize]byte // stack-allocated array for allocation-free unescaping of small strings
  196. for i < ln {
  197. switch data[i] {
  198. case '"':
  199. i++
  200. keyBegin := i
  201. strEnd, keyEscaped := stringEnd(data[i:])
  202. if strEnd == -1 {
  203. return -1
  204. }
  205. i += strEnd
  206. keyEnd := i - 1
  207. valueOffset := nextToken(data[i:])
  208. if valueOffset == -1 {
  209. return -1
  210. }
  211. i += valueOffset
  212. // if string is a key
  213. if data[i] == ':' {
  214. if level < 1 {
  215. return -1
  216. }
  217. key := data[keyBegin:keyEnd]
  218. // for unescape: if there are no escape sequences, this is cheap; if there are, it is a
  219. // bit more expensive, but causes no allocations unless len(key) > unescapeStackBufSize
  220. var keyUnesc []byte
  221. if !keyEscaped {
  222. keyUnesc = key
  223. } else if ku, err := Unescape(key, stackbuf[:]); err != nil {
  224. return -1
  225. } else {
  226. keyUnesc = ku
  227. }
  228. if level <= len(keys) {
  229. if equalStr(&keyUnesc, keys[level-1]) {
  230. lastMatched = true
  231. // if key level match
  232. if keyLevel == level-1 {
  233. keyLevel++
  234. // If we found all keys in path
  235. if keyLevel == lk {
  236. return i + 1
  237. }
  238. }
  239. } else {
  240. lastMatched = false
  241. }
  242. } else {
  243. return -1
  244. }
  245. } else {
  246. i--
  247. }
  248. case '{':
  249. // in case parent key is matched then only we will increase the level otherwise can directly
  250. // can move to the end of this block
  251. if !lastMatched {
  252. end := blockEnd(data[i:], '{', '}')
  253. if end == -1 {
  254. return -1
  255. }
  256. i += end - 1
  257. } else {
  258. level++
  259. }
  260. case '}':
  261. level--
  262. if level == keyLevel {
  263. keyLevel--
  264. }
  265. case '[':
  266. // If we want to get array element by index
  267. if keyLevel == level && keys[level][0] == '[' {
  268. var keyLen = len(keys[level])
  269. if keyLen < 3 || keys[level][0] != '[' || keys[level][keyLen-1] != ']' {
  270. return -1
  271. }
  272. aIdx, err := strconv.Atoi(keys[level][1 : keyLen-1])
  273. if err != nil {
  274. return -1
  275. }
  276. var curIdx int
  277. var valueFound []byte
  278. var valueOffset int
  279. var curI = i
  280. ArrayEach(data[i:], func(value []byte, dataType ValueType, offset int, err error) {
  281. if curIdx == aIdx {
  282. valueFound = value
  283. valueOffset = offset
  284. if dataType == String {
  285. valueOffset = valueOffset - 2
  286. valueFound = data[curI+valueOffset : curI+valueOffset+len(value)+2]
  287. }
  288. }
  289. curIdx += 1
  290. })
  291. if valueFound == nil {
  292. return -1
  293. } else {
  294. subIndex := searchKeys(valueFound, keys[level+1:]...)
  295. if subIndex < 0 {
  296. return -1
  297. }
  298. return i + valueOffset + subIndex
  299. }
  300. } else {
  301. // Do not search for keys inside arrays
  302. if arraySkip := blockEnd(data[i:], '[', ']'); arraySkip == -1 {
  303. return -1
  304. } else {
  305. i += arraySkip - 1
  306. }
  307. }
  308. case ':': // If encountered, JSON data is malformed
  309. return -1
  310. }
  311. i++
  312. }
  313. return -1
  314. }
  315. func sameTree(p1, p2 []string) bool {
  316. minLen := len(p1)
  317. if len(p2) < minLen {
  318. minLen = len(p2)
  319. }
  320. for pi_1, p_1 := range p1[:minLen] {
  321. if p2[pi_1] != p_1 {
  322. return false
  323. }
  324. }
  325. return true
  326. }
  327. func EachKey(data []byte, cb func(int, []byte, ValueType, error), paths ...[]string) int {
  328. var x struct{}
  329. pathFlags := make([]bool, len(paths))
  330. var level, pathsMatched, i int
  331. ln := len(data)
  332. var maxPath int
  333. for _, p := range paths {
  334. if len(p) > maxPath {
  335. maxPath = len(p)
  336. }
  337. }
  338. pathsBuf := make([]string, maxPath)
  339. for i < ln {
  340. switch data[i] {
  341. case '"':
  342. i++
  343. keyBegin := i
  344. strEnd, keyEscaped := stringEnd(data[i:])
  345. if strEnd == -1 {
  346. return -1
  347. }
  348. i += strEnd
  349. keyEnd := i - 1
  350. valueOffset := nextToken(data[i:])
  351. if valueOffset == -1 {
  352. return -1
  353. }
  354. i += valueOffset
  355. // if string is a key, and key level match
  356. if data[i] == ':' {
  357. match := -1
  358. key := data[keyBegin:keyEnd]
  359. // for unescape: if there are no escape sequences, this is cheap; if there are, it is a
  360. // bit more expensive, but causes no allocations unless len(key) > unescapeStackBufSize
  361. var keyUnesc []byte
  362. if !keyEscaped {
  363. keyUnesc = key
  364. } else {
  365. var stackbuf [unescapeStackBufSize]byte
  366. if ku, err := Unescape(key, stackbuf[:]); err != nil {
  367. return -1
  368. } else {
  369. keyUnesc = ku
  370. }
  371. }
  372. if maxPath >= level {
  373. if level < 1 {
  374. cb(-1, nil, Unknown, MalformedJsonError)
  375. return -1
  376. }
  377. pathsBuf[level-1] = bytesToString(&keyUnesc)
  378. for pi, p := range paths {
  379. if len(p) != level || pathFlags[pi] || !equalStr(&keyUnesc, p[level-1]) || !sameTree(p, pathsBuf[:level]) {
  380. continue
  381. }
  382. match = pi
  383. pathsMatched++
  384. pathFlags[pi] = true
  385. v, dt, _, e := Get(data[i+1:])
  386. cb(pi, v, dt, e)
  387. if pathsMatched == len(paths) {
  388. break
  389. }
  390. }
  391. if pathsMatched == len(paths) {
  392. return i
  393. }
  394. }
  395. if match == -1 {
  396. tokenOffset := nextToken(data[i+1:])
  397. i += tokenOffset
  398. if data[i] == '{' {
  399. blockSkip := blockEnd(data[i:], '{', '}')
  400. i += blockSkip + 1
  401. }
  402. }
  403. if i < ln {
  404. switch data[i] {
  405. case '{', '}', '[', '"':
  406. i--
  407. }
  408. }
  409. } else {
  410. i--
  411. }
  412. case '{':
  413. level++
  414. case '}':
  415. level--
  416. case '[':
  417. var ok bool
  418. arrIdxFlags := make(map[int]struct{})
  419. pIdxFlags := make([]bool, len(paths))
  420. if level < 0 {
  421. cb(-1, nil, Unknown, MalformedJsonError)
  422. return -1
  423. }
  424. for pi, p := range paths {
  425. if len(p) < level+1 || pathFlags[pi] || p[level][0] != '[' || !sameTree(p, pathsBuf[:level]) {
  426. continue
  427. }
  428. if len(p[level]) >= 2 {
  429. aIdx, _ := strconv.Atoi(p[level][1 : len(p[level])-1])
  430. arrIdxFlags[aIdx] = x
  431. pIdxFlags[pi] = true
  432. }
  433. }
  434. if len(arrIdxFlags) > 0 {
  435. level++
  436. var curIdx int
  437. arrOff, _ := ArrayEach(data[i:], func(value []byte, dataType ValueType, offset int, err error) {
  438. if _, ok = arrIdxFlags[curIdx]; ok {
  439. for pi, p := range paths {
  440. if pIdxFlags[pi] {
  441. aIdx, _ := strconv.Atoi(p[level-1][1 : len(p[level-1])-1])
  442. if curIdx == aIdx {
  443. of := searchKeys(value, p[level:]...)
  444. pathsMatched++
  445. pathFlags[pi] = true
  446. if of != -1 {
  447. v, dt, _, e := Get(value[of:])
  448. cb(pi, v, dt, e)
  449. }
  450. }
  451. }
  452. }
  453. }
  454. curIdx += 1
  455. })
  456. if pathsMatched == len(paths) {
  457. return i
  458. }
  459. i += arrOff - 1
  460. } else {
  461. // Do not search for keys inside arrays
  462. if arraySkip := blockEnd(data[i:], '[', ']'); arraySkip == -1 {
  463. return -1
  464. } else {
  465. i += arraySkip - 1
  466. }
  467. }
  468. case ']':
  469. level--
  470. }
  471. i++
  472. }
  473. return -1
  474. }
  475. // Data types available in valid JSON data.
  476. type ValueType int
  477. const (
  478. NotExist = ValueType(iota)
  479. String
  480. Number
  481. Object
  482. Array
  483. Boolean
  484. Null
  485. Unknown
  486. )
  487. func (vt ValueType) String() string {
  488. switch vt {
  489. case NotExist:
  490. return "non-existent"
  491. case String:
  492. return "string"
  493. case Number:
  494. return "number"
  495. case Object:
  496. return "object"
  497. case Array:
  498. return "array"
  499. case Boolean:
  500. return "boolean"
  501. case Null:
  502. return "null"
  503. default:
  504. return "unknown"
  505. }
  506. }
  507. var (
  508. trueLiteral = []byte("true")
  509. falseLiteral = []byte("false")
  510. nullLiteral = []byte("null")
  511. )
  512. func createInsertComponent(keys []string, setValue []byte, comma, object bool) []byte {
  513. isIndex := string(keys[0][0]) == "["
  514. offset := 0
  515. lk := calcAllocateSpace(keys, setValue, comma, object)
  516. buffer := make([]byte, lk, lk)
  517. if comma {
  518. offset += WriteToBuffer(buffer[offset:], ",")
  519. }
  520. if isIndex && !comma {
  521. offset += WriteToBuffer(buffer[offset:], "[")
  522. } else {
  523. if object {
  524. offset += WriteToBuffer(buffer[offset:], "{")
  525. }
  526. if !isIndex {
  527. offset += WriteToBuffer(buffer[offset:], "\"")
  528. offset += WriteToBuffer(buffer[offset:], keys[0])
  529. offset += WriteToBuffer(buffer[offset:], "\":")
  530. }
  531. }
  532. for i := 1; i < len(keys); i++ {
  533. if string(keys[i][0]) == "[" {
  534. offset += WriteToBuffer(buffer[offset:], "[")
  535. } else {
  536. offset += WriteToBuffer(buffer[offset:], "{\"")
  537. offset += WriteToBuffer(buffer[offset:], keys[i])
  538. offset += WriteToBuffer(buffer[offset:], "\":")
  539. }
  540. }
  541. offset += WriteToBuffer(buffer[offset:], string(setValue))
  542. for i := len(keys) - 1; i > 0; i-- {
  543. if string(keys[i][0]) == "[" {
  544. offset += WriteToBuffer(buffer[offset:], "]")
  545. } else {
  546. offset += WriteToBuffer(buffer[offset:], "}")
  547. }
  548. }
  549. if isIndex && !comma {
  550. offset += WriteToBuffer(buffer[offset:], "]")
  551. }
  552. if object && !isIndex {
  553. offset += WriteToBuffer(buffer[offset:], "}")
  554. }
  555. return buffer
  556. }
  557. func calcAllocateSpace(keys []string, setValue []byte, comma, object bool) int {
  558. isIndex := string(keys[0][0]) == "["
  559. lk := 0
  560. if comma {
  561. // ,
  562. lk += 1
  563. }
  564. if isIndex && !comma {
  565. // []
  566. lk += 2
  567. } else {
  568. if object {
  569. // {
  570. lk += 1
  571. }
  572. if !isIndex {
  573. // "keys[0]"
  574. lk += len(keys[0]) + 3
  575. }
  576. }
  577. lk += len(setValue)
  578. for i := 1; i < len(keys); i++ {
  579. if string(keys[i][0]) == "[" {
  580. // []
  581. lk += 2
  582. } else {
  583. // {"keys[i]":setValue}
  584. lk += len(keys[i]) + 5
  585. }
  586. }
  587. if object && !isIndex {
  588. // }
  589. lk += 1
  590. }
  591. return lk
  592. }
  593. func WriteToBuffer(buffer []byte, str string) int {
  594. copy(buffer, str)
  595. return len(str)
  596. }
  597. /*
  598. Del - Receives existing data structure, path to delete.
  599. Returns:
  600. `data` - return modified data
  601. */
  602. func Delete(data []byte, keys ...string) []byte {
  603. lk := len(keys)
  604. if lk == 0 {
  605. return data[:0]
  606. }
  607. array := false
  608. if len(keys[lk-1]) > 0 && string(keys[lk-1][0]) == "[" {
  609. array = true
  610. }
  611. var startOffset, keyOffset int
  612. endOffset := len(data)
  613. var err error
  614. if !array {
  615. if len(keys) > 1 {
  616. _, _, startOffset, endOffset, err = internalGet(data, keys[:lk-1]...)
  617. if err == KeyPathNotFoundError {
  618. // problem parsing the data
  619. return data
  620. }
  621. }
  622. keyOffset, err = findKeyStart(data[startOffset:endOffset], keys[lk-1])
  623. if err == KeyPathNotFoundError {
  624. // problem parsing the data
  625. return data
  626. }
  627. keyOffset += startOffset
  628. _, _, _, subEndOffset, _ := internalGet(data[startOffset:endOffset], keys[lk-1])
  629. endOffset = startOffset + subEndOffset
  630. tokEnd := tokenEnd(data[endOffset:])
  631. tokStart := findTokenStart(data[:keyOffset], ","[0])
  632. if data[endOffset+tokEnd] == ","[0] {
  633. endOffset += tokEnd + 1
  634. } else if data[endOffset+tokEnd] == " "[0] && len(data) > endOffset+tokEnd+1 && data[endOffset+tokEnd+1] == ","[0] {
  635. endOffset += tokEnd + 2
  636. } else if data[endOffset+tokEnd] == "}"[0] && data[tokStart] == ","[0] {
  637. keyOffset = tokStart
  638. }
  639. } else {
  640. _, _, keyOffset, endOffset, err = internalGet(data, keys...)
  641. if err == KeyPathNotFoundError {
  642. // problem parsing the data
  643. return data
  644. }
  645. tokEnd := tokenEnd(data[endOffset:])
  646. tokStart := findTokenStart(data[:keyOffset], ","[0])
  647. if data[endOffset+tokEnd] == ","[0] {
  648. endOffset += tokEnd + 1
  649. } else if data[endOffset+tokEnd] == "]"[0] && data[tokStart] == ","[0] {
  650. keyOffset = tokStart
  651. }
  652. }
  653. // We need to remove remaining trailing comma if we delete las element in the object
  654. prevTok := lastToken(data[:keyOffset])
  655. remainedValue := data[endOffset:]
  656. var newOffset int
  657. if nextToken(remainedValue) > -1 && remainedValue[nextToken(remainedValue)] == '}' && data[prevTok] == ',' {
  658. newOffset = prevTok
  659. } else {
  660. newOffset = prevTok + 1
  661. }
  662. // We have to make a copy here if we don't want to mangle the original data, because byte slices are
  663. // accessed by reference and not by value
  664. dataCopy := make([]byte, len(data))
  665. copy(dataCopy, data)
  666. data = append(dataCopy[:newOffset], dataCopy[endOffset:]...)
  667. return data
  668. }
  669. /*
  670. Set - Receives existing data structure, path to set, and data to set at that key.
  671. Returns:
  672. `value` - modified byte array
  673. `err` - On any parsing error
  674. */
  675. func Set(data []byte, setValue []byte, keys ...string) (value []byte, err error) {
  676. // ensure keys are set
  677. if len(keys) == 0 {
  678. return nil, KeyPathNotFoundError
  679. }
  680. _, _, startOffset, endOffset, err := internalGet(data, keys...)
  681. if err != nil {
  682. if err != KeyPathNotFoundError {
  683. // problem parsing the data
  684. return nil, err
  685. }
  686. // full path doesnt exist
  687. // does any subpath exist?
  688. var depth int
  689. for i := range keys {
  690. _, _, start, end, sErr := internalGet(data, keys[:i+1]...)
  691. if sErr != nil {
  692. break
  693. } else {
  694. endOffset = end
  695. startOffset = start
  696. depth++
  697. }
  698. }
  699. comma := true
  700. object := false
  701. if endOffset == -1 {
  702. firstToken := nextToken(data)
  703. // We can't set a top-level key if data isn't an object
  704. if firstToken < 0 || data[firstToken] != '{' {
  705. return nil, KeyPathNotFoundError
  706. }
  707. // Don't need a comma if the input is an empty object
  708. secondToken := firstToken + 1 + nextToken(data[firstToken+1:])
  709. if data[secondToken] == '}' {
  710. comma = false
  711. }
  712. // Set the top level key at the end (accounting for any trailing whitespace)
  713. // This assumes last token is valid like '}', could check and return error
  714. endOffset = lastToken(data)
  715. }
  716. depthOffset := endOffset
  717. if depth != 0 {
  718. // if subpath is a non-empty object, add to it
  719. // or if subpath is a non-empty array, add to it
  720. if (data[startOffset] == '{' && data[startOffset+1+nextToken(data[startOffset+1:])] != '}') ||
  721. (data[startOffset] == '[' && data[startOffset+1+nextToken(data[startOffset+1:])] == '{') && keys[depth:][0][0] == 91 {
  722. depthOffset--
  723. startOffset = depthOffset
  724. // otherwise, over-write it with a new object
  725. } else {
  726. comma = false
  727. object = true
  728. }
  729. } else {
  730. startOffset = depthOffset
  731. }
  732. value = append(data[:startOffset], append(createInsertComponent(keys[depth:], setValue, comma, object), data[depthOffset:]...)...)
  733. } else {
  734. // path currently exists
  735. startComponent := data[:startOffset]
  736. endComponent := data[endOffset:]
  737. value = make([]byte, len(startComponent)+len(endComponent)+len(setValue))
  738. newEndOffset := startOffset + len(setValue)
  739. copy(value[0:startOffset], startComponent)
  740. copy(value[startOffset:newEndOffset], setValue)
  741. copy(value[newEndOffset:], endComponent)
  742. }
  743. return value, nil
  744. }
  745. func getType(data []byte, offset int) ([]byte, ValueType, int, error) {
  746. var dataType ValueType
  747. endOffset := offset
  748. // if string value
  749. if data[offset] == '"' {
  750. dataType = String
  751. if idx, _ := stringEnd(data[offset+1:]); idx != -1 {
  752. endOffset += idx + 1
  753. } else {
  754. return nil, dataType, offset, MalformedStringError
  755. }
  756. } else if data[offset] == '[' { // if array value
  757. dataType = Array
  758. // break label, for stopping nested loops
  759. endOffset = blockEnd(data[offset:], '[', ']')
  760. if endOffset == -1 {
  761. return nil, dataType, offset, MalformedArrayError
  762. }
  763. endOffset += offset
  764. } else if data[offset] == '{' { // if object value
  765. dataType = Object
  766. // break label, for stopping nested loops
  767. endOffset = blockEnd(data[offset:], '{', '}')
  768. if endOffset == -1 {
  769. return nil, dataType, offset, MalformedObjectError
  770. }
  771. endOffset += offset
  772. } else {
  773. // Number, Boolean or None
  774. end := tokenEnd(data[endOffset:])
  775. if end == -1 {
  776. return nil, dataType, offset, MalformedValueError
  777. }
  778. value := data[offset : endOffset+end]
  779. switch data[offset] {
  780. case 't', 'f': // true or false
  781. if bytes.Equal(value, trueLiteral) || bytes.Equal(value, falseLiteral) {
  782. dataType = Boolean
  783. } else {
  784. return nil, Unknown, offset, UnknownValueTypeError
  785. }
  786. case 'u', 'n': // undefined or null
  787. if bytes.Equal(value, nullLiteral) {
  788. dataType = Null
  789. } else {
  790. return nil, Unknown, offset, UnknownValueTypeError
  791. }
  792. case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-':
  793. dataType = Number
  794. default:
  795. return nil, Unknown, offset, UnknownValueTypeError
  796. }
  797. endOffset += end
  798. }
  799. return data[offset:endOffset], dataType, endOffset, nil
  800. }
  801. /*
  802. Get - Receives data structure, and key path to extract value from.
  803. Returns:
  804. `value` - Pointer to original data structure containing key value, or just empty slice if nothing found or error
  805. `dataType` - Can be: `NotExist`, `String`, `Number`, `Object`, `Array`, `Boolean` or `Null`
  806. `offset` - Offset from provided data structure where key value ends. Used mostly internally, for example for `ArrayEach` helper.
  807. `err` - If key not found or any other parsing issue it should return error. If key not found it also sets `dataType` to `NotExist`
  808. Accept multiple keys to specify path to JSON value (in case of quering nested structures).
  809. If no keys provided it will try to extract closest JSON value (simple ones or object/array), useful for reading streams or arrays, see `ArrayEach` implementation.
  810. */
  811. func Get(data []byte, keys ...string) (value []byte, dataType ValueType, offset int, err error) {
  812. a, b, _, d, e := internalGet(data, keys...)
  813. return a, b, d, e
  814. }
  815. func internalGet(data []byte, keys ...string) (value []byte, dataType ValueType, offset, endOffset int, err error) {
  816. if len(keys) > 0 {
  817. if offset = searchKeys(data, keys...); offset == -1 {
  818. return nil, NotExist, -1, -1, KeyPathNotFoundError
  819. }
  820. }
  821. // Go to closest value
  822. nO := nextToken(data[offset:])
  823. if nO == -1 {
  824. return nil, NotExist, offset, -1, MalformedJsonError
  825. }
  826. offset += nO
  827. value, dataType, endOffset, err = getType(data, offset)
  828. if err != nil {
  829. return value, dataType, offset, endOffset, err
  830. }
  831. // Strip quotes from string values
  832. if dataType == String {
  833. value = value[1 : len(value)-1]
  834. }
  835. return value[:len(value):len(value)], dataType, offset, endOffset, nil
  836. }
  837. // ArrayEach is used when iterating arrays, accepts a callback function with the same return arguments as `Get`.
  838. func ArrayEach(data []byte, cb func(value []byte, dataType ValueType, offset int, err error), keys ...string) (offset int, err error) {
  839. if len(data) == 0 {
  840. return -1, MalformedObjectError
  841. }
  842. nT := nextToken(data)
  843. if nT == -1 {
  844. return -1, MalformedJsonError
  845. }
  846. offset = nT + 1
  847. if len(keys) > 0 {
  848. if offset = searchKeys(data, keys...); offset == -1 {
  849. return offset, KeyPathNotFoundError
  850. }
  851. // Go to closest value
  852. nO := nextToken(data[offset:])
  853. if nO == -1 {
  854. return offset, MalformedJsonError
  855. }
  856. offset += nO
  857. if data[offset] != '[' {
  858. return offset, MalformedArrayError
  859. }
  860. offset++
  861. }
  862. nO := nextToken(data[offset:])
  863. if nO == -1 {
  864. return offset, MalformedJsonError
  865. }
  866. offset += nO
  867. if data[offset] == ']' {
  868. return offset, nil
  869. }
  870. for true {
  871. v, t, o, e := Get(data[offset:])
  872. if e != nil {
  873. return offset, e
  874. }
  875. if o == 0 {
  876. break
  877. }
  878. if t != NotExist {
  879. cb(v, t, offset+o-len(v), e)
  880. }
  881. if e != nil {
  882. break
  883. }
  884. offset += o
  885. skipToToken := nextToken(data[offset:])
  886. if skipToToken == -1 {
  887. return offset, MalformedArrayError
  888. }
  889. offset += skipToToken
  890. if data[offset] == ']' {
  891. break
  892. }
  893. if data[offset] != ',' {
  894. return offset, MalformedArrayError
  895. }
  896. offset++
  897. }
  898. return offset, nil
  899. }
  900. // ObjectEach iterates over the key-value pairs of a JSON object, invoking a given callback for each such entry
  901. func ObjectEach(data []byte, callback func(key []byte, value []byte, dataType ValueType, offset int) error, keys ...string) (err error) {
  902. offset := 0
  903. // Descend to the desired key, if requested
  904. if len(keys) > 0 {
  905. if off := searchKeys(data, keys...); off == -1 {
  906. return KeyPathNotFoundError
  907. } else {
  908. offset = off
  909. }
  910. }
  911. // Validate and skip past opening brace
  912. if off := nextToken(data[offset:]); off == -1 {
  913. return MalformedObjectError
  914. } else if offset += off; data[offset] != '{' {
  915. return MalformedObjectError
  916. } else {
  917. offset++
  918. }
  919. // Skip to the first token inside the object, or stop if we find the ending brace
  920. if off := nextToken(data[offset:]); off == -1 {
  921. return MalformedJsonError
  922. } else if offset += off; data[offset] == '}' {
  923. return nil
  924. }
  925. // Loop pre-condition: data[offset] points to what should be either the next entry's key, or the closing brace (if it's anything else, the JSON is malformed)
  926. for offset < len(data) {
  927. // Step 1: find the next key
  928. var key []byte
  929. // Check what the the next token is: start of string, end of object, or something else (error)
  930. switch data[offset] {
  931. case '"':
  932. offset++ // accept as string and skip opening quote
  933. case '}':
  934. return nil // we found the end of the object; stop and return success
  935. default:
  936. return MalformedObjectError
  937. }
  938. // Find the end of the key string
  939. var keyEscaped bool
  940. if off, esc := stringEnd(data[offset:]); off == -1 {
  941. return MalformedJsonError
  942. } else {
  943. key, keyEscaped = data[offset:offset+off-1], esc
  944. offset += off
  945. }
  946. // Unescape the string if needed
  947. if keyEscaped {
  948. var stackbuf [unescapeStackBufSize]byte // stack-allocated array for allocation-free unescaping of small strings
  949. if keyUnescaped, err := Unescape(key, stackbuf[:]); err != nil {
  950. return MalformedStringEscapeError
  951. } else {
  952. key = keyUnescaped
  953. }
  954. }
  955. // Step 2: skip the colon
  956. if off := nextToken(data[offset:]); off == -1 {
  957. return MalformedJsonError
  958. } else if offset += off; data[offset] != ':' {
  959. return MalformedJsonError
  960. } else {
  961. offset++
  962. }
  963. // Step 3: find the associated value, then invoke the callback
  964. if value, valueType, off, err := Get(data[offset:]); err != nil {
  965. return err
  966. } else if err := callback(key, value, valueType, offset+off); err != nil { // Invoke the callback here!
  967. return err
  968. } else {
  969. offset += off
  970. }
  971. // Step 4: skip over the next comma to the following token, or stop if we hit the ending brace
  972. if off := nextToken(data[offset:]); off == -1 {
  973. return MalformedArrayError
  974. } else {
  975. offset += off
  976. switch data[offset] {
  977. case '}':
  978. return nil // Stop if we hit the close brace
  979. case ',':
  980. offset++ // Ignore the comma
  981. default:
  982. return MalformedObjectError
  983. }
  984. }
  985. // Skip to the next token after the comma
  986. if off := nextToken(data[offset:]); off == -1 {
  987. return MalformedArrayError
  988. } else {
  989. offset += off
  990. }
  991. }
  992. return MalformedObjectError // we shouldn't get here; it's expected that we will return via finding the ending brace
  993. }
  994. // GetUnsafeString returns the value retrieved by `Get`, use creates string without memory allocation by mapping string to slice memory. It does not handle escape symbols.
  995. func GetUnsafeString(data []byte, keys ...string) (val string, err error) {
  996. v, _, _, e := Get(data, keys...)
  997. if e != nil {
  998. return "", e
  999. }
  1000. return bytesToString(&v), nil
  1001. }
  1002. // GetString returns the value retrieved by `Get`, cast to a string if possible, trying to properly handle escape and utf8 symbols
  1003. // If key data type do not match, it will return an error.
  1004. func GetString(data []byte, keys ...string) (val string, err error) {
  1005. v, t, _, e := Get(data, keys...)
  1006. if e != nil {
  1007. return "", e
  1008. }
  1009. if t != String {
  1010. return "", fmt.Errorf("Value is not a string: %s", string(v))
  1011. }
  1012. // If no escapes return raw content
  1013. if bytes.IndexByte(v, '\\') == -1 {
  1014. return string(v), nil
  1015. }
  1016. return ParseString(v)
  1017. }
  1018. // GetFloat returns the value retrieved by `Get`, cast to a float64 if possible.
  1019. // The offset is the same as in `Get`.
  1020. // If key data type do not match, it will return an error.
  1021. func GetFloat(data []byte, keys ...string) (val float64, err error) {
  1022. v, t, _, e := Get(data, keys...)
  1023. if e != nil {
  1024. return 0, e
  1025. }
  1026. if t != Number {
  1027. return 0, fmt.Errorf("Value is not a number: %s", string(v))
  1028. }
  1029. return ParseFloat(v)
  1030. }
  1031. // GetInt returns the value retrieved by `Get`, cast to a int64 if possible.
  1032. // If key data type do not match, it will return an error.
  1033. func GetInt(data []byte, keys ...string) (val int64, err error) {
  1034. v, t, _, e := Get(data, keys...)
  1035. if e != nil {
  1036. return 0, e
  1037. }
  1038. if t != Number {
  1039. return 0, fmt.Errorf("Value is not a number: %s", string(v))
  1040. }
  1041. return ParseInt(v)
  1042. }
  1043. // GetBoolean returns the value retrieved by `Get`, cast to a bool if possible.
  1044. // The offset is the same as in `Get`.
  1045. // If key data type do not match, it will return error.
  1046. func GetBoolean(data []byte, keys ...string) (val bool, err error) {
  1047. v, t, _, e := Get(data, keys...)
  1048. if e != nil {
  1049. return false, e
  1050. }
  1051. if t != Boolean {
  1052. return false, fmt.Errorf("Value is not a boolean: %s", string(v))
  1053. }
  1054. return ParseBoolean(v)
  1055. }
  1056. // ParseBoolean parses a Boolean ValueType into a Go bool (not particularly useful, but here for completeness)
  1057. func ParseBoolean(b []byte) (bool, error) {
  1058. switch {
  1059. case bytes.Equal(b, trueLiteral):
  1060. return true, nil
  1061. case bytes.Equal(b, falseLiteral):
  1062. return false, nil
  1063. default:
  1064. return false, MalformedValueError
  1065. }
  1066. }
  1067. // ParseString parses a String ValueType into a Go string (the main parsing work is unescaping the JSON string)
  1068. func ParseString(b []byte) (string, error) {
  1069. var stackbuf [unescapeStackBufSize]byte // stack-allocated array for allocation-free unescaping of small strings
  1070. if bU, err := Unescape(b, stackbuf[:]); err != nil {
  1071. return "", MalformedValueError
  1072. } else {
  1073. return string(bU), nil
  1074. }
  1075. }
  1076. // ParseNumber parses a Number ValueType into a Go float64
  1077. func ParseFloat(b []byte) (float64, error) {
  1078. if v, err := parseFloat(&b); err != nil {
  1079. return 0, MalformedValueError
  1080. } else {
  1081. return v, nil
  1082. }
  1083. }
  1084. // ParseInt parses a Number ValueType into a Go int64
  1085. func ParseInt(b []byte) (int64, error) {
  1086. if v, ok, overflow := parseInt(b); !ok {
  1087. if overflow {
  1088. return 0, OverflowIntegerError
  1089. }
  1090. return 0, MalformedValueError
  1091. } else {
  1092. return v, nil
  1093. }
  1094. }