| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277 |
- // Copyright (C) 2016 Kohei YOSHIDA. All rights reserved.
- //
- // This program is free software; you can redistribute it and/or
- // modify it under the terms of The BSD 3-Clause License
- // that can be found in the LICENSE file.
- package uritemplate
- import (
- "fmt"
- "unicode"
- "unicode/utf8"
- )
- type parseOp int
- const (
- parseOpSimple parseOp = iota
- parseOpPlus
- parseOpCrosshatch
- parseOpDot
- parseOpSlash
- parseOpSemicolon
- parseOpQuestion
- parseOpAmpersand
- )
- var (
- rangeVarchar = &unicode.RangeTable{
- R16: []unicode.Range16{
- {Lo: 0x0030, Hi: 0x0039, Stride: 1}, // '0' - '9'
- {Lo: 0x0041, Hi: 0x005A, Stride: 1}, // 'A' - 'Z'
- {Lo: 0x005F, Hi: 0x005F, Stride: 1}, // '_'
- {Lo: 0x0061, Hi: 0x007A, Stride: 1}, // 'a' - 'z'
- },
- LatinOffset: 4,
- }
- rangeLiterals = &unicode.RangeTable{
- R16: []unicode.Range16{
- {Lo: 0x0021, Hi: 0x0021, Stride: 1}, // '!'
- {Lo: 0x0023, Hi: 0x0024, Stride: 1}, // '#' - '$'
- {Lo: 0x0026, Hi: 0x003B, Stride: 1}, // '&' ''' '(' - ';'. '''/27 used to be excluded but an errata is in the review process https://www.rfc-editor.org/errata/eid6937
- {Lo: 0x003D, Hi: 0x003D, Stride: 1}, // '='
- {Lo: 0x003F, Hi: 0x005B, Stride: 1}, // '?' - '['
- {Lo: 0x005D, Hi: 0x005D, Stride: 1}, // ']'
- {Lo: 0x005F, Hi: 0x005F, Stride: 1}, // '_'
- {Lo: 0x0061, Hi: 0x007A, Stride: 1}, // 'a' - 'z'
- {Lo: 0x007E, Hi: 0x007E, Stride: 1}, // '~'
- {Lo: 0x00A0, Hi: 0xD7FF, Stride: 1}, // ucschar
- {Lo: 0xE000, Hi: 0xF8FF, Stride: 1}, // iprivate
- {Lo: 0xF900, Hi: 0xFDCF, Stride: 1}, // ucschar
- {Lo: 0xFDF0, Hi: 0xFFEF, Stride: 1}, // ucschar
- },
- R32: []unicode.Range32{
- {Lo: 0x00010000, Hi: 0x0001FFFD, Stride: 1}, // ucschar
- {Lo: 0x00020000, Hi: 0x0002FFFD, Stride: 1}, // ucschar
- {Lo: 0x00030000, Hi: 0x0003FFFD, Stride: 1}, // ucschar
- {Lo: 0x00040000, Hi: 0x0004FFFD, Stride: 1}, // ucschar
- {Lo: 0x00050000, Hi: 0x0005FFFD, Stride: 1}, // ucschar
- {Lo: 0x00060000, Hi: 0x0006FFFD, Stride: 1}, // ucschar
- {Lo: 0x00070000, Hi: 0x0007FFFD, Stride: 1}, // ucschar
- {Lo: 0x00080000, Hi: 0x0008FFFD, Stride: 1}, // ucschar
- {Lo: 0x00090000, Hi: 0x0009FFFD, Stride: 1}, // ucschar
- {Lo: 0x000A0000, Hi: 0x000AFFFD, Stride: 1}, // ucschar
- {Lo: 0x000B0000, Hi: 0x000BFFFD, Stride: 1}, // ucschar
- {Lo: 0x000C0000, Hi: 0x000CFFFD, Stride: 1}, // ucschar
- {Lo: 0x000D0000, Hi: 0x000DFFFD, Stride: 1}, // ucschar
- {Lo: 0x000E1000, Hi: 0x000EFFFD, Stride: 1}, // ucschar
- {Lo: 0x000F0000, Hi: 0x000FFFFD, Stride: 1}, // iprivate
- {Lo: 0x00100000, Hi: 0x0010FFFD, Stride: 1}, // iprivate
- },
- LatinOffset: 10,
- }
- )
- type parser struct {
- r string
- start int
- stop int
- state parseState
- }
- func (p *parser) errorf(i rune, format string, a ...interface{}) error {
- return fmt.Errorf("%s: %s%s", fmt.Sprintf(format, a...), p.r[0:p.stop], string(i))
- }
- func (p *parser) rune() (rune, int) {
- r, size := utf8.DecodeRuneInString(p.r[p.stop:])
- if r != utf8.RuneError {
- p.stop += size
- }
- return r, size
- }
- func (p *parser) unread(r rune) {
- p.stop -= utf8.RuneLen(r)
- }
- type parseState int
- const (
- parseStateDefault = parseState(iota)
- parseStateOperator
- parseStateVarList
- parseStateVarName
- parseStatePrefix
- )
- func (p *parser) setState(state parseState) {
- p.state = state
- p.start = p.stop
- }
- func (p *parser) parseURITemplate() (*Template, error) {
- tmpl := Template{
- raw: p.r,
- exprs: []template{},
- }
- var exp *expression
- for {
- r, size := p.rune()
- if r == utf8.RuneError {
- if size == 0 {
- if p.state != parseStateDefault {
- return nil, p.errorf('_', "incomplete expression")
- }
- if p.start < p.stop {
- tmpl.exprs = append(tmpl.exprs, literals(p.r[p.start:p.stop]))
- }
- return &tmpl, nil
- }
- return nil, p.errorf('_', "invalid UTF-8 sequence")
- }
- switch p.state {
- case parseStateDefault:
- switch r {
- case '{':
- if stop := p.stop - size; stop > p.start {
- tmpl.exprs = append(tmpl.exprs, literals(p.r[p.start:stop]))
- }
- exp = &expression{}
- tmpl.exprs = append(tmpl.exprs, exp)
- p.setState(parseStateOperator)
- case '%':
- p.unread(r)
- if err := p.consumeTriplet(); err != nil {
- return nil, err
- }
- default:
- if !unicode.Is(rangeLiterals, r) {
- p.unread(r)
- return nil, p.errorf('_', "unacceptable character (hint: use %%XX encoding)")
- }
- }
- case parseStateOperator:
- switch r {
- default:
- p.unread(r)
- exp.op = parseOpSimple
- case '+':
- exp.op = parseOpPlus
- case '#':
- exp.op = parseOpCrosshatch
- case '.':
- exp.op = parseOpDot
- case '/':
- exp.op = parseOpSlash
- case ';':
- exp.op = parseOpSemicolon
- case '?':
- exp.op = parseOpQuestion
- case '&':
- exp.op = parseOpAmpersand
- case '=', ',', '!', '@', '|': // op-reserved
- return nil, p.errorf('|', "unimplemented operator (op-reserved)")
- }
- p.setState(parseStateVarName)
- case parseStateVarList:
- switch r {
- case ',':
- p.setState(parseStateVarName)
- case '}':
- exp.init()
- p.setState(parseStateDefault)
- default:
- p.unread(r)
- return nil, p.errorf('_', "unrecognized value modifier")
- }
- case parseStateVarName:
- switch r {
- case ':', '*':
- name := p.r[p.start : p.stop-size]
- if !isValidVarname(name) {
- return nil, p.errorf('|', "unacceptable variable name")
- }
- explode := r == '*'
- exp.vars = append(exp.vars, varspec{
- name: name,
- explode: explode,
- })
- if explode {
- p.setState(parseStateVarList)
- } else {
- p.setState(parseStatePrefix)
- }
- case ',', '}':
- p.unread(r)
- name := p.r[p.start:p.stop]
- if !isValidVarname(name) {
- return nil, p.errorf('|', "unacceptable variable name")
- }
- exp.vars = append(exp.vars, varspec{
- name: name,
- })
- p.setState(parseStateVarList)
- case '%':
- p.unread(r)
- if err := p.consumeTriplet(); err != nil {
- return nil, err
- }
- case '.':
- if dot := p.stop - size; dot == p.start || p.r[dot-1] == '.' {
- return nil, p.errorf('|', "unacceptable variable name")
- }
- default:
- if !unicode.Is(rangeVarchar, r) {
- p.unread(r)
- return nil, p.errorf('_', "unacceptable variable name")
- }
- }
- case parseStatePrefix:
- spec := &(exp.vars[len(exp.vars)-1])
- switch {
- case '0' <= r && r <= '9':
- spec.maxlen *= 10
- spec.maxlen += int(r - '0')
- if spec.maxlen == 0 || spec.maxlen > 9999 {
- return nil, p.errorf('|', "max-length must be (0, 9999]")
- }
- default:
- p.unread(r)
- if spec.maxlen == 0 {
- return nil, p.errorf('_', "max-length must be (0, 9999]")
- }
- p.setState(parseStateVarList)
- }
- default:
- p.unread(r)
- panic(p.errorf('_', "unhandled parseState(%d)", p.state))
- }
- }
- }
- func isValidVarname(name string) bool {
- if l := len(name); l == 0 || name[0] == '.' || name[l-1] == '.' {
- return false
- }
- for i := 1; i < len(name)-1; i++ {
- switch c := name[i]; c {
- case '.':
- if name[i-1] == '.' {
- return false
- }
- }
- }
- return true
- }
- func (p *parser) consumeTriplet() error {
- if len(p.r)-p.stop < 3 || p.r[p.stop] != '%' || !ishex(p.r[p.stop+1]) || !ishex(p.r[p.stop+2]) {
- return p.errorf('_', "incomplete pct-encodeed")
- }
- p.stop += 3
- return nil
- }
|