parse.go 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
  1. // Copyright (C) 2016 Kohei YOSHIDA. All rights reserved.
  2. //
  3. // This program is free software; you can redistribute it and/or
  4. // modify it under the terms of The BSD 3-Clause License
  5. // that can be found in the LICENSE file.
  6. package uritemplate
  7. import (
  8. "fmt"
  9. "unicode"
  10. "unicode/utf8"
  11. )
  12. type parseOp int
  13. const (
  14. parseOpSimple parseOp = iota
  15. parseOpPlus
  16. parseOpCrosshatch
  17. parseOpDot
  18. parseOpSlash
  19. parseOpSemicolon
  20. parseOpQuestion
  21. parseOpAmpersand
  22. )
  23. var (
  24. rangeVarchar = &unicode.RangeTable{
  25. R16: []unicode.Range16{
  26. {Lo: 0x0030, Hi: 0x0039, Stride: 1}, // '0' - '9'
  27. {Lo: 0x0041, Hi: 0x005A, Stride: 1}, // 'A' - 'Z'
  28. {Lo: 0x005F, Hi: 0x005F, Stride: 1}, // '_'
  29. {Lo: 0x0061, Hi: 0x007A, Stride: 1}, // 'a' - 'z'
  30. },
  31. LatinOffset: 4,
  32. }
  33. rangeLiterals = &unicode.RangeTable{
  34. R16: []unicode.Range16{
  35. {Lo: 0x0021, Hi: 0x0021, Stride: 1}, // '!'
  36. {Lo: 0x0023, Hi: 0x0024, Stride: 1}, // '#' - '$'
  37. {Lo: 0x0026, Hi: 0x003B, Stride: 1}, // '&' ''' '(' - ';'. '''/27 used to be excluded but an errata is in the review process https://www.rfc-editor.org/errata/eid6937
  38. {Lo: 0x003D, Hi: 0x003D, Stride: 1}, // '='
  39. {Lo: 0x003F, Hi: 0x005B, Stride: 1}, // '?' - '['
  40. {Lo: 0x005D, Hi: 0x005D, Stride: 1}, // ']'
  41. {Lo: 0x005F, Hi: 0x005F, Stride: 1}, // '_'
  42. {Lo: 0x0061, Hi: 0x007A, Stride: 1}, // 'a' - 'z'
  43. {Lo: 0x007E, Hi: 0x007E, Stride: 1}, // '~'
  44. {Lo: 0x00A0, Hi: 0xD7FF, Stride: 1}, // ucschar
  45. {Lo: 0xE000, Hi: 0xF8FF, Stride: 1}, // iprivate
  46. {Lo: 0xF900, Hi: 0xFDCF, Stride: 1}, // ucschar
  47. {Lo: 0xFDF0, Hi: 0xFFEF, Stride: 1}, // ucschar
  48. },
  49. R32: []unicode.Range32{
  50. {Lo: 0x00010000, Hi: 0x0001FFFD, Stride: 1}, // ucschar
  51. {Lo: 0x00020000, Hi: 0x0002FFFD, Stride: 1}, // ucschar
  52. {Lo: 0x00030000, Hi: 0x0003FFFD, Stride: 1}, // ucschar
  53. {Lo: 0x00040000, Hi: 0x0004FFFD, Stride: 1}, // ucschar
  54. {Lo: 0x00050000, Hi: 0x0005FFFD, Stride: 1}, // ucschar
  55. {Lo: 0x00060000, Hi: 0x0006FFFD, Stride: 1}, // ucschar
  56. {Lo: 0x00070000, Hi: 0x0007FFFD, Stride: 1}, // ucschar
  57. {Lo: 0x00080000, Hi: 0x0008FFFD, Stride: 1}, // ucschar
  58. {Lo: 0x00090000, Hi: 0x0009FFFD, Stride: 1}, // ucschar
  59. {Lo: 0x000A0000, Hi: 0x000AFFFD, Stride: 1}, // ucschar
  60. {Lo: 0x000B0000, Hi: 0x000BFFFD, Stride: 1}, // ucschar
  61. {Lo: 0x000C0000, Hi: 0x000CFFFD, Stride: 1}, // ucschar
  62. {Lo: 0x000D0000, Hi: 0x000DFFFD, Stride: 1}, // ucschar
  63. {Lo: 0x000E1000, Hi: 0x000EFFFD, Stride: 1}, // ucschar
  64. {Lo: 0x000F0000, Hi: 0x000FFFFD, Stride: 1}, // iprivate
  65. {Lo: 0x00100000, Hi: 0x0010FFFD, Stride: 1}, // iprivate
  66. },
  67. LatinOffset: 10,
  68. }
  69. )
  70. type parser struct {
  71. r string
  72. start int
  73. stop int
  74. state parseState
  75. }
  76. func (p *parser) errorf(i rune, format string, a ...interface{}) error {
  77. return fmt.Errorf("%s: %s%s", fmt.Sprintf(format, a...), p.r[0:p.stop], string(i))
  78. }
  79. func (p *parser) rune() (rune, int) {
  80. r, size := utf8.DecodeRuneInString(p.r[p.stop:])
  81. if r != utf8.RuneError {
  82. p.stop += size
  83. }
  84. return r, size
  85. }
  86. func (p *parser) unread(r rune) {
  87. p.stop -= utf8.RuneLen(r)
  88. }
  89. type parseState int
  90. const (
  91. parseStateDefault = parseState(iota)
  92. parseStateOperator
  93. parseStateVarList
  94. parseStateVarName
  95. parseStatePrefix
  96. )
  97. func (p *parser) setState(state parseState) {
  98. p.state = state
  99. p.start = p.stop
  100. }
  101. func (p *parser) parseURITemplate() (*Template, error) {
  102. tmpl := Template{
  103. raw: p.r,
  104. exprs: []template{},
  105. }
  106. var exp *expression
  107. for {
  108. r, size := p.rune()
  109. if r == utf8.RuneError {
  110. if size == 0 {
  111. if p.state != parseStateDefault {
  112. return nil, p.errorf('_', "incomplete expression")
  113. }
  114. if p.start < p.stop {
  115. tmpl.exprs = append(tmpl.exprs, literals(p.r[p.start:p.stop]))
  116. }
  117. return &tmpl, nil
  118. }
  119. return nil, p.errorf('_', "invalid UTF-8 sequence")
  120. }
  121. switch p.state {
  122. case parseStateDefault:
  123. switch r {
  124. case '{':
  125. if stop := p.stop - size; stop > p.start {
  126. tmpl.exprs = append(tmpl.exprs, literals(p.r[p.start:stop]))
  127. }
  128. exp = &expression{}
  129. tmpl.exprs = append(tmpl.exprs, exp)
  130. p.setState(parseStateOperator)
  131. case '%':
  132. p.unread(r)
  133. if err := p.consumeTriplet(); err != nil {
  134. return nil, err
  135. }
  136. default:
  137. if !unicode.Is(rangeLiterals, r) {
  138. p.unread(r)
  139. return nil, p.errorf('_', "unacceptable character (hint: use %%XX encoding)")
  140. }
  141. }
  142. case parseStateOperator:
  143. switch r {
  144. default:
  145. p.unread(r)
  146. exp.op = parseOpSimple
  147. case '+':
  148. exp.op = parseOpPlus
  149. case '#':
  150. exp.op = parseOpCrosshatch
  151. case '.':
  152. exp.op = parseOpDot
  153. case '/':
  154. exp.op = parseOpSlash
  155. case ';':
  156. exp.op = parseOpSemicolon
  157. case '?':
  158. exp.op = parseOpQuestion
  159. case '&':
  160. exp.op = parseOpAmpersand
  161. case '=', ',', '!', '@', '|': // op-reserved
  162. return nil, p.errorf('|', "unimplemented operator (op-reserved)")
  163. }
  164. p.setState(parseStateVarName)
  165. case parseStateVarList:
  166. switch r {
  167. case ',':
  168. p.setState(parseStateVarName)
  169. case '}':
  170. exp.init()
  171. p.setState(parseStateDefault)
  172. default:
  173. p.unread(r)
  174. return nil, p.errorf('_', "unrecognized value modifier")
  175. }
  176. case parseStateVarName:
  177. switch r {
  178. case ':', '*':
  179. name := p.r[p.start : p.stop-size]
  180. if !isValidVarname(name) {
  181. return nil, p.errorf('|', "unacceptable variable name")
  182. }
  183. explode := r == '*'
  184. exp.vars = append(exp.vars, varspec{
  185. name: name,
  186. explode: explode,
  187. })
  188. if explode {
  189. p.setState(parseStateVarList)
  190. } else {
  191. p.setState(parseStatePrefix)
  192. }
  193. case ',', '}':
  194. p.unread(r)
  195. name := p.r[p.start:p.stop]
  196. if !isValidVarname(name) {
  197. return nil, p.errorf('|', "unacceptable variable name")
  198. }
  199. exp.vars = append(exp.vars, varspec{
  200. name: name,
  201. })
  202. p.setState(parseStateVarList)
  203. case '%':
  204. p.unread(r)
  205. if err := p.consumeTriplet(); err != nil {
  206. return nil, err
  207. }
  208. case '.':
  209. if dot := p.stop - size; dot == p.start || p.r[dot-1] == '.' {
  210. return nil, p.errorf('|', "unacceptable variable name")
  211. }
  212. default:
  213. if !unicode.Is(rangeVarchar, r) {
  214. p.unread(r)
  215. return nil, p.errorf('_', "unacceptable variable name")
  216. }
  217. }
  218. case parseStatePrefix:
  219. spec := &(exp.vars[len(exp.vars)-1])
  220. switch {
  221. case '0' <= r && r <= '9':
  222. spec.maxlen *= 10
  223. spec.maxlen += int(r - '0')
  224. if spec.maxlen == 0 || spec.maxlen > 9999 {
  225. return nil, p.errorf('|', "max-length must be (0, 9999]")
  226. }
  227. default:
  228. p.unread(r)
  229. if spec.maxlen == 0 {
  230. return nil, p.errorf('_', "max-length must be (0, 9999]")
  231. }
  232. p.setState(parseStateVarList)
  233. }
  234. default:
  235. p.unread(r)
  236. panic(p.errorf('_', "unhandled parseState(%d)", p.state))
  237. }
  238. }
  239. }
  240. func isValidVarname(name string) bool {
  241. if l := len(name); l == 0 || name[0] == '.' || name[l-1] == '.' {
  242. return false
  243. }
  244. for i := 1; i < len(name)-1; i++ {
  245. switch c := name[i]; c {
  246. case '.':
  247. if name[i-1] == '.' {
  248. return false
  249. }
  250. }
  251. }
  252. return true
  253. }
  254. func (p *parser) consumeTriplet() error {
  255. if len(p.r)-p.stop < 3 || p.r[p.stop] != '%' || !ishex(p.r[p.stop+1]) || !ishex(p.r[p.stop+2]) {
  256. return p.errorf('_', "incomplete pct-encodeed")
  257. }
  258. p.stop += 3
  259. return nil
  260. }