| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767 |
- // Unless explicitly stated otherwise all files in this repository are licensed
- // under the Apache License Version 2.0.
- // This product includes software developed at Datadog (https://www.datadoghq.com/).
- // Copyright 2016-present Datadog, Inc.
- package obfuscate
- import (
- "bytes"
- "fmt"
- "unicode"
- "unicode/utf8"
- )
- // tokenizer.go implemenents a lexer-like iterator that tokenizes SQL and CQL
- // strings, so that an external component can filter or alter each token of the
- // string. This implementation can't be used as a real SQL lexer (so a parser
- // cannot build the AST) because many rules are ignored to make the tokenizer
- // simpler.
- // This implementation was inspired by https://github.com/youtube/vitess sql parser
- // TODO: add the license to the NOTICE file
- // TokenKind specifies the type of the token being scanned. It may be one of the defined
- // constants below or in some cases the actual rune itself.
- type TokenKind uint32
- // EndChar is used to signal that the scanner has finished reading the query. This happens when
- // there are no more characters left in the query or when invalid encoding is discovered. EndChar
- // is an invalid rune value that can not be found in any valid string.
- const EndChar = unicode.MaxRune + 1
- // list of available tokens; this list has been reduced because we don't
- // need a full-fledged tokenizer to implement a Lexer
- const (
- LexError = TokenKind(57346) + iota
- ID
- Limit
- Null
- String
- DoubleQuotedString
- DollarQuotedString // https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-DOLLAR-QUOTING
- DollarQuotedFunc // a dollar-quoted string delimited by the tag "$func$"; gets special treatment when feature "dollar_quoted_func" is set
- Number
- BooleanLiteral
- ValueArg
- ListArg
- Comment
- Variable
- Savepoint
- PreparedStatement
- EscapeSequence
- NullSafeEqual
- LE
- GE
- NE
- Not
- As
- From
- Update
- Insert
- Into
- Join
- TableName
- ColonCast
- // FilteredGroupable specifies that the given token has been discarded by one of the
- // token filters and that it is groupable together with consecutive FilteredGroupable
- // tokens.
- FilteredGroupable
- // FilteredGroupableParenthesis is a parenthesis marked as filtered groupable. It is the
- // beginning of either a group of values ('(') or a nested query. We track is as
- // a special case for when it may start a nested query as opposed to just another
- // value group to be obfuscated.
- FilteredGroupableParenthesis
- // Filtered specifies that the token is a comma and was discarded by one
- // of the filters.
- Filtered
- // FilteredBracketedIdentifier specifies that we are currently discarding
- // a bracketed identifier (MSSQL).
- // See issue https://github.com/DataDog/datadog-trace-agent/issues/475.
- FilteredBracketedIdentifier
- )
- var tokenKindStrings = map[TokenKind]string{
- LexError: "LexError",
- ID: "ID",
- Limit: "Limit",
- Null: "Null",
- String: "String",
- DoubleQuotedString: "DoubleQuotedString",
- DollarQuotedString: "DollarQuotedString",
- DollarQuotedFunc: "DollarQuotedFunc",
- Number: "Number",
- BooleanLiteral: "BooleanLiteral",
- ValueArg: "ValueArg",
- ListArg: "ListArg",
- Comment: "Comment",
- Variable: "Variable",
- Savepoint: "Savepoint",
- PreparedStatement: "PreparedStatement",
- EscapeSequence: "EscapeSequence",
- NullSafeEqual: "NullSafeEqual",
- LE: "LE",
- GE: "GE",
- NE: "NE",
- Not: "NOT",
- As: "As",
- From: "From",
- Update: "Update",
- Insert: "Insert",
- Into: "Into",
- Join: "Join",
- TableName: "TableName",
- ColonCast: "ColonCast",
- FilteredGroupable: "FilteredGroupable",
- FilteredGroupableParenthesis: "FilteredGroupableParenthesis",
- Filtered: "Filtered",
- FilteredBracketedIdentifier: "FilteredBracketedIdentifier",
- }
- func (k TokenKind) String() string {
- str, ok := tokenKindStrings[k]
- if !ok {
- return "<unknown>"
- }
- return str
- }
- const escapeCharacter = '\\'
- // SQLTokenizer is the struct used to generate SQL
- // tokens for the parser.
- type SQLTokenizer struct {
- pos int // byte offset of lastChar
- lastChar rune // last read rune
- buf []byte // buf holds the query that we are parsing
- off int // off is the index into buf where the unread portion of the query begins.
- err error // any error occurred while reading
- curlys uint32 // number of active open curly braces in top-level SQL escape sequences.
- literalEscapes bool // indicates we should not treat backslashes as escape characters
- seenEscape bool // indicates whether this tokenizer has seen an escape character within a string
- cfg *SQLConfig
- }
- // NewSQLTokenizer creates a new SQLTokenizer for the given SQL string. The literalEscapes argument specifies
- // whether escape characters should be treated literally or as such.
- func NewSQLTokenizer(sql string, literalEscapes bool, cfg *SQLConfig) *SQLTokenizer {
- if cfg == nil {
- cfg = new(SQLConfig)
- }
- return &SQLTokenizer{
- buf: []byte(sql),
- cfg: cfg,
- literalEscapes: literalEscapes,
- }
- }
- // Reset the underlying buffer and positions
- func (tkn *SQLTokenizer) Reset(in string) {
- tkn.pos = 0
- tkn.lastChar = 0
- tkn.buf = []byte(in)
- tkn.off = 0
- tkn.err = nil
- }
- // keywords used to recognize string tokens
- var keywords = map[string]TokenKind{
- "NULL": Null,
- "TRUE": BooleanLiteral,
- "FALSE": BooleanLiteral,
- "SAVEPOINT": Savepoint,
- "LIMIT": Limit,
- "AS": As,
- "FROM": From,
- "UPDATE": Update,
- "INSERT": Insert,
- "INTO": Into,
- "JOIN": Join,
- }
- // Err returns the last error that the tokenizer encountered, or nil.
- func (tkn *SQLTokenizer) Err() error { return tkn.err }
- func (tkn *SQLTokenizer) setErr(format string, args ...interface{}) {
- if tkn.err != nil {
- return
- }
- tkn.err = fmt.Errorf("at position %d: %v", tkn.pos, fmt.Errorf(format, args...))
- }
- // SeenEscape returns whether or not this tokenizer has seen an escape character within a scanned string
- func (tkn *SQLTokenizer) SeenEscape() bool { return tkn.seenEscape }
- // Scan scans the tokenizer for the next token and returns
- // the token type and the token buffer.
- func (tkn *SQLTokenizer) Scan() (TokenKind, []byte) {
- if tkn.lastChar == 0 {
- tkn.advance()
- }
- tkn.skipBlank()
- switch ch := tkn.lastChar; {
- case isLeadingLetter(ch):
- return tkn.scanIdentifier()
- case isDigit(ch):
- return tkn.scanNumber(false)
- default:
- tkn.advance()
- if tkn.lastChar == EndChar && tkn.err != nil {
- // advance discovered an invalid encoding. We should return early.
- return LexError, nil
- }
- switch ch {
- case EndChar:
- if tkn.err != nil {
- return LexError, nil
- }
- return EndChar, nil
- case ':':
- if tkn.lastChar == ':' {
- tkn.advance()
- return ColonCast, []byte("::")
- }
- if unicode.IsSpace(tkn.lastChar) {
- // example scenario: "autovacuum: VACUUM ANALYZE fake.table"
- return TokenKind(ch), tkn.bytes()
- }
- if tkn.lastChar != '=' {
- return tkn.scanBindVar()
- }
- fallthrough
- case '~':
- switch tkn.lastChar {
- case '*':
- tkn.advance()
- return TokenKind('~'), []byte("~*")
- default:
- return TokenKind(ch), tkn.bytes()
- }
- case '=', ',', ';', '(', ')', '+', '*', '&', '|', '^', '[', ']', '?':
- return TokenKind(ch), tkn.bytes()
- case '.':
- if isDigit(tkn.lastChar) {
- return tkn.scanNumber(true)
- }
- return TokenKind(ch), tkn.bytes()
- case '/':
- switch tkn.lastChar {
- case '/':
- tkn.advance()
- return tkn.scanCommentType1("//")
- case '*':
- tkn.advance()
- return tkn.scanCommentType2()
- default:
- return TokenKind(ch), tkn.bytes()
- }
- case '-':
- switch {
- case tkn.lastChar == '-':
- tkn.advance()
- return tkn.scanCommentType1("--")
- case isDigit(tkn.lastChar):
- tkn.advance()
- kind, tokenBytes := tkn.scanNumber(false)
- return kind, append([]byte{'-'}, tokenBytes...)
- default:
- return TokenKind(ch), tkn.bytes()
- }
- case '#':
- tkn.advance()
- return tkn.scanCommentType1("#")
- case '<':
- switch tkn.lastChar {
- case '>':
- tkn.advance()
- return NE, []byte("<>")
- case '=':
- tkn.advance()
- switch tkn.lastChar {
- case '>':
- tkn.advance()
- return NullSafeEqual, []byte("<=>")
- default:
- return LE, []byte("<=")
- }
- default:
- return TokenKind(ch), tkn.bytes()
- }
- case '>':
- if tkn.lastChar == '=' {
- tkn.advance()
- return GE, []byte(">=")
- }
- return TokenKind(ch), tkn.bytes()
- case '!':
- switch tkn.lastChar {
- case '=':
- tkn.advance()
- return NE, []byte("!=")
- case '~':
- tkn.advance()
- switch tkn.lastChar {
- case '*':
- tkn.advance()
- return NE, []byte("!~*")
- default:
- return NE, []byte("!~")
- }
- default:
- if isValidCharAfterOperator(tkn.lastChar) {
- return Not, tkn.bytes()
- }
- tkn.setErr(`unexpected char "%c" (%d) after "!"`, tkn.lastChar, tkn.lastChar)
- return LexError, tkn.bytes()
- }
- case '\'':
- return tkn.scanString(ch, String)
- case '"':
- return tkn.scanString(ch, DoubleQuotedString)
- case '`':
- return tkn.scanString(ch, ID)
- case '%':
- if tkn.lastChar == '(' {
- return tkn.scanVariableIdentifier('%')
- }
- if isLetter(tkn.lastChar) {
- // format parameter (e.g. '%s')
- return tkn.scanFormatParameter('%')
- }
- // modulo operator (e.g. 'id % 8')
- return TokenKind(ch), tkn.bytes()
- case '$':
- if isDigit(tkn.lastChar) {
- // TODO(gbbr): the first digit after $ does not necessarily guarantee
- // that this isn't a dollar-quoted string constant. We might eventually
- // want to cover for this use-case too (e.g. $1$some text$1$).
- return tkn.scanPreparedStatement('$')
- }
- kind, tok := tkn.scanDollarQuotedString()
- if kind == DollarQuotedFunc {
- // this is considered an embedded query, we should try and
- // obfuscate it
- out, err := attemptObfuscation(NewSQLTokenizer(string(tok), tkn.literalEscapes, tkn.cfg))
- if err != nil {
- // if we can't obfuscate it, treat it as a regular string
- return DollarQuotedString, tok
- }
- tok = append(append([]byte("$func$"), []byte(out.Query)...), []byte("$func$")...)
- }
- return kind, tok
- case '{':
- if tkn.pos == 1 || tkn.curlys > 0 {
- // Do not fully obfuscate top-level SQL escape sequences like {{[?=]call procedure-name[([parameter][,parameter]...)]}.
- // We want these to display a bit more context than just a plain '?'
- // See: https://docs.oracle.com/cd/E13157_01/wlevs/docs30/jdbc_drivers/sqlescape.html
- tkn.curlys++
- return TokenKind(ch), tkn.bytes()
- }
- return tkn.scanEscapeSequence('{')
- case '}':
- if tkn.curlys == 0 {
- // A closing curly brace has no place outside an in-progress top-level SQL escape sequence
- // started by the '{' switch-case.
- tkn.setErr(`unexpected byte %d`, ch)
- return LexError, tkn.bytes()
- }
- tkn.curlys--
- return TokenKind(ch), tkn.bytes()
- default:
- tkn.setErr(`unexpected byte %d`, ch)
- return LexError, tkn.bytes()
- }
- }
- }
- func (tkn *SQLTokenizer) skipBlank() {
- for unicode.IsSpace(tkn.lastChar) {
- tkn.advance()
- }
- tkn.bytes()
- }
- // toUpper is a modified version of bytes.ToUpper. It returns an upper-cased version of the byte
- // slice src with all Unicode letters mapped to their upper case. It is modified to also accept a
- // byte slice dst as an argument, the underlying storage of which (up to the capacity of dst)
- // will be used as the destination of the upper-case copy of src, if it fits. As a special case,
- // toUpper will return src if the byte slice is already upper-case. This function is used rather
- // than bytes.ToUpper to improve the memory performance of the obfuscator by saving unnecessary
- // allocations happening in bytes.ToUpper
- func toUpper(src, dst []byte) []byte {
- dst = dst[:0]
- isASCII, hasLower := true, false
- for i := 0; i < len(src); i++ {
- c := src[i]
- if c >= utf8.RuneSelf {
- isASCII = false
- break
- }
- hasLower = hasLower || ('a' <= c && c <= 'z')
- }
- if cap(dst) < len(src) {
- dst = make([]byte, 0, len(src))
- }
- if isASCII { // optimize for ASCII-only byte slices.
- if !hasLower {
- // Just return src.
- return src
- }
- dst = dst[:len(src)]
- for i := 0; i < len(src); i++ {
- c := src[i]
- if 'a' <= c && c <= 'z' {
- c -= 'a' - 'A'
- }
- dst[i] = c
- }
- return dst
- }
- // This *could* be optimized, but it's an uncommon case.
- return bytes.Map(unicode.ToUpper, src)
- }
- func (tkn *SQLTokenizer) scanIdentifier() (TokenKind, []byte) {
- tkn.advance()
- for isLetter(tkn.lastChar) || isDigit(tkn.lastChar) || tkn.lastChar == '.' || tkn.lastChar == '*' {
- tkn.advance()
- }
- t := tkn.bytes()
- // Space allows us to upper-case identifiers 256 bytes long or less without allocating heap
- // storage for them, since space is allocated on the stack. A size of 256 bytes was chosen
- // based on the allowed length of sql identifiers in various sql implementations.
- var space [256]byte
- upper := toUpper(t, space[:0])
- if keywordID, found := keywords[string(upper)]; found {
- return keywordID, t
- }
- return ID, t
- }
- func (tkn *SQLTokenizer) scanVariableIdentifier(prefix rune) (TokenKind, []byte) {
- for tkn.advance(); tkn.lastChar != ')' && tkn.lastChar != EndChar; tkn.advance() {
- }
- tkn.advance()
- if !isLetter(tkn.lastChar) {
- tkn.setErr(`invalid character after variable identifier: "%c" (%d)`, tkn.lastChar, tkn.lastChar)
- return LexError, tkn.bytes()
- }
- tkn.advance()
- return Variable, tkn.bytes()
- }
- func (tkn *SQLTokenizer) scanFormatParameter(prefix rune) (TokenKind, []byte) {
- tkn.advance()
- return Variable, tkn.bytes()
- }
- // scanDollarQuotedString scans a Postgres dollar-quoted string constant.
- // See: https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-DOLLAR-QUOTING
- func (tkn *SQLTokenizer) scanDollarQuotedString() (TokenKind, []byte) {
- kind, tag := tkn.scanString('$', String)
- if kind == LexError {
- return kind, tkn.bytes()
- }
- var (
- got int
- buf bytes.Buffer
- )
- delim := tag
- // on empty strings, tkn.scanString returns the delimiters
- if string(delim) != "$$" {
- // on non-empty strings, the delimiter is $tag$
- delim = append([]byte{'$'}, delim...)
- delim = append(delim, '$')
- }
- for {
- ch := tkn.lastChar
- tkn.advance()
- if ch == EndChar {
- tkn.setErr("unexpected EOF in dollar-quoted string")
- return LexError, buf.Bytes()
- }
- if byte(ch) == delim[got] {
- got++
- if got == len(delim) {
- break
- }
- continue
- }
- if got > 0 {
- _, err := buf.Write(delim[:got])
- if err != nil {
- tkn.setErr("error reading dollar-quoted string: %v", err)
- return LexError, buf.Bytes()
- }
- got = 0
- }
- buf.WriteRune(ch)
- }
- if tkn.cfg.DollarQuotedFunc && string(delim) == "$func$" {
- return DollarQuotedFunc, buf.Bytes()
- }
- return DollarQuotedString, buf.Bytes()
- }
- func (tkn *SQLTokenizer) scanPreparedStatement(prefix rune) (TokenKind, []byte) {
- // a prepared statement expect a digit identifier like $1
- if !isDigit(tkn.lastChar) {
- tkn.setErr(`prepared statements must start with digits, got "%c" (%d)`, tkn.lastChar, tkn.lastChar)
- return LexError, tkn.bytes()
- }
- // scanNumber keeps the prefix rune intact.
- // read numbers and return an error if any
- token, buff := tkn.scanNumber(false)
- if token == LexError {
- tkn.setErr("invalid number")
- return LexError, tkn.bytes()
- }
- return PreparedStatement, buff
- }
- func (tkn *SQLTokenizer) scanEscapeSequence(braces rune) (TokenKind, []byte) {
- for tkn.lastChar != '}' && tkn.lastChar != EndChar {
- tkn.advance()
- }
- // we've reached the end of the string without finding
- // the closing curly braces
- if tkn.lastChar == EndChar {
- tkn.setErr("unexpected EOF in escape sequence")
- return LexError, tkn.bytes()
- }
- tkn.advance()
- return EscapeSequence, tkn.bytes()
- }
- func (tkn *SQLTokenizer) scanBindVar() (TokenKind, []byte) {
- token := ValueArg
- if tkn.lastChar == ':' {
- token = ListArg
- tkn.advance()
- }
- if !isLetter(tkn.lastChar) {
- tkn.setErr(`bind variables should start with letters, got "%c" (%d)`, tkn.lastChar, tkn.lastChar)
- return LexError, tkn.bytes()
- }
- for isLetter(tkn.lastChar) || isDigit(tkn.lastChar) || tkn.lastChar == '.' {
- tkn.advance()
- }
- return token, tkn.bytes()
- }
- func (tkn *SQLTokenizer) scanMantissa(base int) {
- for digitVal(tkn.lastChar) < base {
- tkn.advance()
- }
- }
- func (tkn *SQLTokenizer) scanNumber(seenDecimalPoint bool) (TokenKind, []byte) {
- if seenDecimalPoint {
- tkn.scanMantissa(10)
- goto exponent
- }
- if tkn.lastChar == '0' {
- // int or float
- tkn.advance()
- if tkn.lastChar == 'x' || tkn.lastChar == 'X' {
- // hexadecimal int
- tkn.advance()
- tkn.scanMantissa(16)
- } else {
- // octal int or float
- seenDecimalDigit := false
- tkn.scanMantissa(8)
- if tkn.lastChar == '8' || tkn.lastChar == '9' {
- // illegal octal int or float
- seenDecimalDigit = true
- tkn.scanMantissa(10)
- }
- if tkn.lastChar == '.' || tkn.lastChar == 'e' || tkn.lastChar == 'E' {
- goto fraction
- }
- // octal int
- if seenDecimalDigit {
- // tkn.setErr called in caller
- return LexError, tkn.bytes()
- }
- }
- goto exit
- }
- // decimal int or float
- tkn.scanMantissa(10)
- fraction:
- if tkn.lastChar == '.' {
- tkn.advance()
- tkn.scanMantissa(10)
- }
- exponent:
- if tkn.lastChar == 'e' || tkn.lastChar == 'E' {
- tkn.advance()
- if tkn.lastChar == '+' || tkn.lastChar == '-' {
- tkn.advance()
- }
- tkn.scanMantissa(10)
- }
- exit:
- t := tkn.bytes()
- if len(t) == 0 {
- return LexError, nil
- }
- return Number, t
- }
- func (tkn *SQLTokenizer) scanString(delim rune, kind TokenKind) (TokenKind, []byte) {
- buf := bytes.NewBuffer(tkn.buf[:0])
- for {
- ch := tkn.lastChar
- tkn.advance()
- if ch == delim {
- if tkn.lastChar == delim {
- // doubling a delimiter is the default way to embed the delimiter within a string
- tkn.advance()
- } else {
- // a single delimiter denotes the end of the string
- break
- }
- } else if ch == escapeCharacter {
- tkn.seenEscape = true
- if !tkn.literalEscapes {
- // treat as an escape character
- ch = tkn.lastChar
- tkn.advance()
- }
- }
- if ch == EndChar {
- tkn.setErr("unexpected EOF in string")
- return LexError, buf.Bytes()
- }
- buf.WriteRune(ch)
- }
- if kind == ID && buf.Len() == 0 || bytes.IndexFunc(buf.Bytes(), func(r rune) bool { return !unicode.IsSpace(r) }) == -1 {
- // This string is an empty or white-space only identifier.
- // We should keep the start and end delimiters in order to
- // avoid creating invalid queries.
- // See: https://github.com/DataDog/datadog-trace-agent/issues/316
- return kind, append(runeBytes(delim), runeBytes(delim)...)
- }
- return kind, buf.Bytes()
- }
- func (tkn *SQLTokenizer) scanCommentType1(prefix string) (TokenKind, []byte) {
- for tkn.lastChar != EndChar {
- if tkn.lastChar == '\n' {
- tkn.advance()
- break
- }
- tkn.advance()
- }
- return Comment, tkn.bytes()
- }
- func (tkn *SQLTokenizer) scanCommentType2() (TokenKind, []byte) {
- for {
- if tkn.lastChar == '*' {
- tkn.advance()
- if tkn.lastChar == '/' {
- tkn.advance()
- break
- }
- continue
- }
- if tkn.lastChar == EndChar {
- tkn.setErr("unexpected EOF in comment")
- return LexError, tkn.bytes()
- }
- tkn.advance()
- }
- return Comment, tkn.bytes()
- }
- // advance advances the tokenizer to the next rune. If the decoder encounters an error decoding, or
- // the end of the buffer is reached, tkn.lastChar will be set to EndChar. In case of a decoding
- // error, tkn.err will also be set.
- func (tkn *SQLTokenizer) advance() {
- ch, n := utf8.DecodeRune(tkn.buf[tkn.off:])
- if ch == utf8.RuneError && n < 2 {
- tkn.pos++
- tkn.lastChar = EndChar
- if n == 1 {
- tkn.setErr("invalid UTF-8 encoding beginning with 0x%x", tkn.buf[tkn.off])
- }
- return
- }
- if tkn.lastChar != 0 || tkn.pos > 0 {
- // we are past the first character
- tkn.pos += n
- }
- tkn.off += n
- tkn.lastChar = ch
- }
- // bytes returns all the bytes that were advanced over since its last call.
- // This excludes tkn.lastChar, which will remain in the buffer
- func (tkn *SQLTokenizer) bytes() []byte {
- if tkn.lastChar == EndChar {
- ret := tkn.buf[:tkn.off]
- tkn.buf = tkn.buf[tkn.off:]
- tkn.off = 0
- return ret
- }
- lastLen := utf8.RuneLen(tkn.lastChar)
- ret := tkn.buf[:tkn.off-lastLen]
- tkn.buf = tkn.buf[tkn.off-lastLen:]
- tkn.off = lastLen
- return ret
- }
- func isLeadingLetter(ch rune) bool {
- return unicode.IsLetter(ch) || ch == '_' || ch == '@'
- }
- func isLetter(ch rune) bool {
- return isLeadingLetter(ch) || ch == '#'
- }
- func digitVal(ch rune) int {
- switch {
- case '0' <= ch && ch <= '9':
- return int(ch) - '0'
- case 'a' <= ch && ch <= 'f':
- return int(ch) - 'a' + 10
- case 'A' <= ch && ch <= 'F':
- return int(ch) - 'A' + 10
- }
- return 16 // larger than any legal digit val
- }
- func isDigit(ch rune) bool { return '0' <= ch && ch <= '9' }
- // runeBytes converts the given rune to a slice of bytes.
- func runeBytes(r rune) []byte {
- buf := make([]byte, utf8.UTFMax)
- n := utf8.EncodeRune(buf, r)
- return buf[:n]
- }
- // isValidCharAfterOperator returns true if c is a valid character after an operator
- func isValidCharAfterOperator(c rune) bool {
- return c == '(' || c == '`' || c == '\'' || c == '"' || c == '+' || c == '-' || unicode.IsSpace(c) || isLetter(c) || isDigit(c)
- }
|