lex.go 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917
  1. // Copyright 2015 The Prometheus Authors
  2. // Licensed under the Apache License, Version 2.0 (the "License");
  3. // you may not use this file except in compliance with the License.
  4. // You may obtain a copy of the License at
  5. //
  6. // http://www.apache.org/licenses/LICENSE-2.0
  7. //
  8. // Unless required by applicable law or agreed to in writing, software
  9. // distributed under the License is distributed on an "AS IS" BASIS,
  10. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. // See the License for the specific language governing permissions and
  12. // limitations under the License.
  13. package promql
  14. import (
  15. "fmt"
  16. "strings"
  17. "unicode"
  18. "unicode/utf8"
  19. )
  20. // item represents a token or text string returned from the scanner.
  21. type item struct {
  22. typ ItemType // The type of this item.
  23. pos Pos // The starting position, in bytes, of this item in the input string.
  24. val string // The value of this item.
  25. }
  26. // String returns a descriptive string for the item.
  27. func (i item) String() string {
  28. switch {
  29. case i.typ == ItemEOF:
  30. return "EOF"
  31. case i.typ == ItemError:
  32. return i.val
  33. case i.typ == ItemIdentifier || i.typ == ItemMetricIdentifier:
  34. return fmt.Sprintf("%q", i.val)
  35. case i.typ.isKeyword():
  36. return fmt.Sprintf("<%s>", i.val)
  37. case i.typ.isOperator():
  38. return fmt.Sprintf("<op:%s>", i.val)
  39. case i.typ.isAggregator():
  40. return fmt.Sprintf("<aggr:%s>", i.val)
  41. case len(i.val) > 10:
  42. return fmt.Sprintf("%.10q...", i.val)
  43. }
  44. return fmt.Sprintf("%q", i.val)
  45. }
  46. // isOperator returns true if the item corresponds to a arithmetic or set operator.
  47. // Returns false otherwise.
  48. func (i ItemType) isOperator() bool { return i > operatorsStart && i < operatorsEnd }
  49. // isAggregator returns true if the item belongs to the aggregator functions.
  50. // Returns false otherwise
  51. func (i ItemType) isAggregator() bool { return i > aggregatorsStart && i < aggregatorsEnd }
  52. // isAggregator returns true if the item is an aggregator that takes a parameter.
  53. // Returns false otherwise
  54. func (i ItemType) isAggregatorWithParam() bool {
  55. return i == ItemTopK || i == ItemBottomK || i == ItemCountValues || i == ItemQuantile
  56. }
  57. // isKeyword returns true if the item corresponds to a keyword.
  58. // Returns false otherwise.
  59. func (i ItemType) isKeyword() bool { return i > keywordsStart && i < keywordsEnd }
  60. // isCompairsonOperator returns true if the item corresponds to a comparison operator.
  61. // Returns false otherwise.
  62. func (i ItemType) isComparisonOperator() bool {
  63. switch i {
  64. case ItemEQL, ItemNEQ, ItemLTE, ItemLSS, ItemGTE, ItemGTR:
  65. return true
  66. default:
  67. return false
  68. }
  69. }
  70. // isSetOperator returns whether the item corresponds to a set operator.
  71. func (i ItemType) isSetOperator() bool {
  72. switch i {
  73. case ItemLAND, ItemLOR, ItemLUnless:
  74. return true
  75. }
  76. return false
  77. }
  78. // LowestPrec is a constant for operator precedence in expressions.
  79. const LowestPrec = 0 // Non-operators.
  80. // Precedence returns the operator precedence of the binary
  81. // operator op. If op is not a binary operator, the result
  82. // is LowestPrec.
  83. func (i ItemType) precedence() int {
  84. switch i {
  85. case ItemLOR:
  86. return 1
  87. case ItemLAND, ItemLUnless:
  88. return 2
  89. case ItemEQL, ItemNEQ, ItemLTE, ItemLSS, ItemGTE, ItemGTR:
  90. return 3
  91. case ItemADD, ItemSUB:
  92. return 4
  93. case ItemMUL, ItemDIV, ItemMOD:
  94. return 5
  95. case ItemPOW:
  96. return 6
  97. default:
  98. return LowestPrec
  99. }
  100. }
  101. func (i ItemType) isRightAssociative() bool {
  102. switch i {
  103. case ItemPOW:
  104. return true
  105. default:
  106. return false
  107. }
  108. }
  109. type ItemType int
  110. const (
  111. ItemError ItemType = iota // Error occurred, value is error message
  112. ItemEOF
  113. ItemComment
  114. ItemIdentifier
  115. ItemMetricIdentifier
  116. ItemLeftParen
  117. ItemRightParen
  118. ItemLeftBrace
  119. ItemRightBrace
  120. ItemLeftBracket
  121. ItemRightBracket
  122. ItemComma
  123. ItemAssign
  124. ItemColon
  125. ItemSemicolon
  126. ItemString
  127. ItemNumber
  128. ItemDuration
  129. ItemBlank
  130. ItemTimes
  131. ItemSpace
  132. operatorsStart
  133. // Operators.
  134. ItemSUB
  135. ItemADD
  136. ItemMUL
  137. ItemMOD
  138. ItemDIV
  139. ItemLAND
  140. ItemLOR
  141. ItemLUnless
  142. ItemEQL
  143. ItemNEQ
  144. ItemLTE
  145. ItemLSS
  146. ItemGTE
  147. ItemGTR
  148. ItemEQLRegex
  149. ItemNEQRegex
  150. ItemPOW
  151. operatorsEnd
  152. aggregatorsStart
  153. // Aggregators.
  154. ItemAvg
  155. ItemCount
  156. ItemSum
  157. ItemMin
  158. ItemMax
  159. ItemStddev
  160. ItemStdvar
  161. ItemTopK
  162. ItemBottomK
  163. ItemCountValues
  164. ItemQuantile
  165. aggregatorsEnd
  166. keywordsStart
  167. // Keywords.
  168. ItemOffset
  169. ItemBy
  170. ItemWithout
  171. ItemOn
  172. ItemIgnoring
  173. ItemGroupLeft
  174. ItemGroupRight
  175. ItemBool
  176. keywordsEnd
  177. )
  178. var key = map[string]ItemType{
  179. // Operators.
  180. "and": ItemLAND,
  181. "or": ItemLOR,
  182. "unless": ItemLUnless,
  183. // Aggregators.
  184. "sum": ItemSum,
  185. "avg": ItemAvg,
  186. "count": ItemCount,
  187. "min": ItemMin,
  188. "max": ItemMax,
  189. "stddev": ItemStddev,
  190. "stdvar": ItemStdvar,
  191. "topk": ItemTopK,
  192. "bottomk": ItemBottomK,
  193. "count_values": ItemCountValues,
  194. "quantile": ItemQuantile,
  195. // Keywords.
  196. "offset": ItemOffset,
  197. "by": ItemBy,
  198. "without": ItemWithout,
  199. "on": ItemOn,
  200. "ignoring": ItemIgnoring,
  201. "group_left": ItemGroupLeft,
  202. "group_right": ItemGroupRight,
  203. "bool": ItemBool,
  204. }
  205. // These are the default string representations for common items. It does not
  206. // imply that those are the only character sequences that can be lexed to such an item.
  207. var itemTypeStr = map[ItemType]string{
  208. ItemLeftParen: "(",
  209. ItemRightParen: ")",
  210. ItemLeftBrace: "{",
  211. ItemRightBrace: "}",
  212. ItemLeftBracket: "[",
  213. ItemRightBracket: "]",
  214. ItemComma: ",",
  215. ItemAssign: "=",
  216. ItemColon: ":",
  217. ItemSemicolon: ";",
  218. ItemBlank: "_",
  219. ItemTimes: "x",
  220. ItemSpace: "<space>",
  221. ItemSUB: "-",
  222. ItemADD: "+",
  223. ItemMUL: "*",
  224. ItemMOD: "%",
  225. ItemDIV: "/",
  226. ItemEQL: "==",
  227. ItemNEQ: "!=",
  228. ItemLTE: "<=",
  229. ItemLSS: "<",
  230. ItemGTE: ">=",
  231. ItemGTR: ">",
  232. ItemEQLRegex: "=~",
  233. ItemNEQRegex: "!~",
  234. ItemPOW: "^",
  235. }
  236. func init() {
  237. // Add keywords to item type strings.
  238. for s, ty := range key {
  239. itemTypeStr[ty] = s
  240. }
  241. // Special numbers.
  242. key["inf"] = ItemNumber
  243. key["nan"] = ItemNumber
  244. }
  245. func (i ItemType) String() string {
  246. if s, ok := itemTypeStr[i]; ok {
  247. return s
  248. }
  249. return fmt.Sprintf("<item %d>", i)
  250. }
  251. func (i item) desc() string {
  252. if _, ok := itemTypeStr[i.typ]; ok {
  253. return i.String()
  254. }
  255. if i.typ == ItemEOF {
  256. return i.typ.desc()
  257. }
  258. return fmt.Sprintf("%s %s", i.typ.desc(), i)
  259. }
  260. func (i ItemType) desc() string {
  261. switch i {
  262. case ItemError:
  263. return "error"
  264. case ItemEOF:
  265. return "end of input"
  266. case ItemComment:
  267. return "comment"
  268. case ItemIdentifier:
  269. return "identifier"
  270. case ItemMetricIdentifier:
  271. return "metric identifier"
  272. case ItemString:
  273. return "string"
  274. case ItemNumber:
  275. return "number"
  276. case ItemDuration:
  277. return "duration"
  278. }
  279. return fmt.Sprintf("%q", i)
  280. }
  281. const eof = -1
  282. // stateFn represents the state of the scanner as a function that returns the next state.
  283. type stateFn func(*lexer) stateFn
  284. // Pos is the position in a string.
  285. type Pos int
  286. // lexer holds the state of the scanner.
  287. type lexer struct {
  288. input string // The string being scanned.
  289. state stateFn // The next lexing function to enter.
  290. pos Pos // Current position in the input.
  291. start Pos // Start position of this item.
  292. width Pos // Width of last rune read from input.
  293. lastPos Pos // Position of most recent item returned by nextItem.
  294. items chan item // Channel of scanned items.
  295. parenDepth int // Nesting depth of ( ) exprs.
  296. braceOpen bool // Whether a { is opened.
  297. bracketOpen bool // Whether a [ is opened.
  298. gotColon bool // Whether we got a ':' after [ was opened.
  299. stringOpen rune // Quote rune of the string currently being read.
  300. // seriesDesc is set when a series description for the testing
  301. // language is lexed.
  302. seriesDesc bool
  303. }
  304. // next returns the next rune in the input.
  305. func (l *lexer) next() rune {
  306. if int(l.pos) >= len(l.input) {
  307. l.width = 0
  308. return eof
  309. }
  310. r, w := utf8.DecodeRuneInString(l.input[l.pos:])
  311. l.width = Pos(w)
  312. l.pos += l.width
  313. return r
  314. }
  315. // peek returns but does not consume the next rune in the input.
  316. func (l *lexer) peek() rune {
  317. r := l.next()
  318. l.backup()
  319. return r
  320. }
  321. // backup steps back one rune. Can only be called once per call of next.
  322. func (l *lexer) backup() {
  323. l.pos -= l.width
  324. }
  325. // emit passes an item back to the client.
  326. func (l *lexer) emit(t ItemType) {
  327. l.items <- item{t, l.start, l.input[l.start:l.pos]}
  328. l.start = l.pos
  329. }
  330. // ignore skips over the pending input before this point.
  331. func (l *lexer) ignore() {
  332. l.start = l.pos
  333. }
  334. // accept consumes the next rune if it's from the valid set.
  335. func (l *lexer) accept(valid string) bool {
  336. if strings.ContainsRune(valid, l.next()) {
  337. return true
  338. }
  339. l.backup()
  340. return false
  341. }
  342. // acceptRun consumes a run of runes from the valid set.
  343. func (l *lexer) acceptRun(valid string) {
  344. for strings.ContainsRune(valid, l.next()) {
  345. // consume
  346. }
  347. l.backup()
  348. }
  349. // lineNumber reports which line we're on, based on the position of
  350. // the previous item returned by nextItem. Doing it this way
  351. // means we don't have to worry about peek double counting.
  352. func (l *lexer) lineNumber() int {
  353. return 1 + strings.Count(l.input[:l.lastPos], "\n")
  354. }
  355. // linePosition reports at which character in the current line
  356. // we are on.
  357. func (l *lexer) linePosition() int {
  358. lb := strings.LastIndex(l.input[:l.lastPos], "\n")
  359. if lb == -1 {
  360. return 1 + int(l.lastPos)
  361. }
  362. return 1 + int(l.lastPos) - lb
  363. }
  364. // errorf returns an error token and terminates the scan by passing
  365. // back a nil pointer that will be the next state, terminating l.nextItem.
  366. func (l *lexer) errorf(format string, args ...interface{}) stateFn {
  367. l.items <- item{ItemError, l.start, fmt.Sprintf(format, args...)}
  368. return nil
  369. }
  370. // nextItem returns the next item from the input.
  371. func (l *lexer) nextItem() item {
  372. item := <-l.items
  373. l.lastPos = item.pos
  374. return item
  375. }
  376. // lex creates a new scanner for the input string.
  377. func lex(input string) *lexer {
  378. l := &lexer{
  379. input: input,
  380. items: make(chan item),
  381. }
  382. go l.run()
  383. return l
  384. }
  385. // run runs the state machine for the lexer.
  386. func (l *lexer) run() {
  387. for l.state = lexStatements; l.state != nil; {
  388. l.state = l.state(l)
  389. }
  390. close(l.items)
  391. }
  392. // Release resources used by lexer.
  393. func (l *lexer) close() {
  394. for range l.items {
  395. // Consume.
  396. }
  397. }
  398. // lineComment is the character that starts a line comment.
  399. const lineComment = "#"
  400. // lexStatements is the top-level state for lexing.
  401. func lexStatements(l *lexer) stateFn {
  402. if l.braceOpen {
  403. return lexInsideBraces
  404. }
  405. if strings.HasPrefix(l.input[l.pos:], lineComment) {
  406. return lexLineComment
  407. }
  408. switch r := l.next(); {
  409. case r == eof:
  410. if l.parenDepth != 0 {
  411. return l.errorf("unclosed left parenthesis")
  412. } else if l.bracketOpen {
  413. return l.errorf("unclosed left bracket")
  414. }
  415. l.emit(ItemEOF)
  416. return nil
  417. case r == ',':
  418. l.emit(ItemComma)
  419. case isSpace(r):
  420. return lexSpace
  421. case r == '*':
  422. l.emit(ItemMUL)
  423. case r == '/':
  424. l.emit(ItemDIV)
  425. case r == '%':
  426. l.emit(ItemMOD)
  427. case r == '+':
  428. l.emit(ItemADD)
  429. case r == '-':
  430. l.emit(ItemSUB)
  431. case r == '^':
  432. l.emit(ItemPOW)
  433. case r == '=':
  434. if t := l.peek(); t == '=' {
  435. l.next()
  436. l.emit(ItemEQL)
  437. } else if t == '~' {
  438. return l.errorf("unexpected character after '=': %q", t)
  439. } else {
  440. l.emit(ItemAssign)
  441. }
  442. case r == '!':
  443. if t := l.next(); t == '=' {
  444. l.emit(ItemNEQ)
  445. } else {
  446. return l.errorf("unexpected character after '!': %q", t)
  447. }
  448. case r == '<':
  449. if t := l.peek(); t == '=' {
  450. l.next()
  451. l.emit(ItemLTE)
  452. } else {
  453. l.emit(ItemLSS)
  454. }
  455. case r == '>':
  456. if t := l.peek(); t == '=' {
  457. l.next()
  458. l.emit(ItemGTE)
  459. } else {
  460. l.emit(ItemGTR)
  461. }
  462. case isDigit(r) || (r == '.' && isDigit(l.peek())):
  463. l.backup()
  464. return lexNumberOrDuration
  465. case r == '"' || r == '\'':
  466. l.stringOpen = r
  467. return lexString
  468. case r == '`':
  469. l.stringOpen = r
  470. return lexRawString
  471. case isAlpha(r) || r == ':':
  472. if !l.bracketOpen {
  473. l.backup()
  474. return lexKeywordOrIdentifier
  475. }
  476. if l.gotColon {
  477. return l.errorf("unexpected colon %q", r)
  478. }
  479. l.emit(ItemColon)
  480. l.gotColon = true
  481. case r == '(':
  482. l.emit(ItemLeftParen)
  483. l.parenDepth++
  484. return lexStatements
  485. case r == ')':
  486. l.emit(ItemRightParen)
  487. l.parenDepth--
  488. if l.parenDepth < 0 {
  489. return l.errorf("unexpected right parenthesis %q", r)
  490. }
  491. return lexStatements
  492. case r == '{':
  493. l.emit(ItemLeftBrace)
  494. l.braceOpen = true
  495. return lexInsideBraces(l)
  496. case r == '[':
  497. if l.bracketOpen {
  498. return l.errorf("unexpected left bracket %q", r)
  499. }
  500. l.gotColon = false
  501. l.emit(ItemLeftBracket)
  502. l.bracketOpen = true
  503. return lexDuration
  504. case r == ']':
  505. if !l.bracketOpen {
  506. return l.errorf("unexpected right bracket %q", r)
  507. }
  508. l.emit(ItemRightBracket)
  509. l.bracketOpen = false
  510. default:
  511. return l.errorf("unexpected character: %q", r)
  512. }
  513. return lexStatements
  514. }
  515. // lexInsideBraces scans the inside of a vector selector. Keywords are ignored and
  516. // scanned as identifiers.
  517. func lexInsideBraces(l *lexer) stateFn {
  518. if strings.HasPrefix(l.input[l.pos:], lineComment) {
  519. return lexLineComment
  520. }
  521. switch r := l.next(); {
  522. case r == eof:
  523. return l.errorf("unexpected end of input inside braces")
  524. case isSpace(r):
  525. return lexSpace
  526. case isAlpha(r):
  527. l.backup()
  528. return lexIdentifier
  529. case r == ',':
  530. l.emit(ItemComma)
  531. case r == '"' || r == '\'':
  532. l.stringOpen = r
  533. return lexString
  534. case r == '`':
  535. l.stringOpen = r
  536. return lexRawString
  537. case r == '=':
  538. if l.next() == '~' {
  539. l.emit(ItemEQLRegex)
  540. break
  541. }
  542. l.backup()
  543. l.emit(ItemEQL)
  544. case r == '!':
  545. switch nr := l.next(); {
  546. case nr == '~':
  547. l.emit(ItemNEQRegex)
  548. case nr == '=':
  549. l.emit(ItemNEQ)
  550. default:
  551. return l.errorf("unexpected character after '!' inside braces: %q", nr)
  552. }
  553. case r == '{':
  554. return l.errorf("unexpected left brace %q", r)
  555. case r == '}':
  556. l.emit(ItemRightBrace)
  557. l.braceOpen = false
  558. if l.seriesDesc {
  559. return lexValueSequence
  560. }
  561. return lexStatements
  562. default:
  563. return l.errorf("unexpected character inside braces: %q", r)
  564. }
  565. return lexInsideBraces
  566. }
  567. // lexValueSequence scans a value sequence of a series description.
  568. func lexValueSequence(l *lexer) stateFn {
  569. switch r := l.next(); {
  570. case r == eof:
  571. return lexStatements
  572. case isSpace(r):
  573. l.emit(ItemSpace)
  574. lexSpace(l)
  575. case r == '+':
  576. l.emit(ItemADD)
  577. case r == '-':
  578. l.emit(ItemSUB)
  579. case r == 'x':
  580. l.emit(ItemTimes)
  581. case r == '_':
  582. l.emit(ItemBlank)
  583. case isDigit(r) || (r == '.' && isDigit(l.peek())):
  584. l.backup()
  585. lexNumber(l)
  586. case isAlpha(r):
  587. l.backup()
  588. // We might lex invalid items here but this will be caught by the parser.
  589. return lexKeywordOrIdentifier
  590. default:
  591. return l.errorf("unexpected character in series sequence: %q", r)
  592. }
  593. return lexValueSequence
  594. }
  595. // lexEscape scans a string escape sequence. The initial escaping character (\)
  596. // has already been seen.
  597. //
  598. // NOTE: This function as well as the helper function digitVal() and associated
  599. // tests have been adapted from the corresponding functions in the "go/scanner"
  600. // package of the Go standard library to work for Prometheus-style strings.
  601. // None of the actual escaping/quoting logic was changed in this function - it
  602. // was only modified to integrate with our lexer.
  603. func lexEscape(l *lexer) {
  604. var n int
  605. var base, max uint32
  606. ch := l.next()
  607. switch ch {
  608. case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', l.stringOpen:
  609. return
  610. case '0', '1', '2', '3', '4', '5', '6', '7':
  611. n, base, max = 3, 8, 255
  612. case 'x':
  613. ch = l.next()
  614. n, base, max = 2, 16, 255
  615. case 'u':
  616. ch = l.next()
  617. n, base, max = 4, 16, unicode.MaxRune
  618. case 'U':
  619. ch = l.next()
  620. n, base, max = 8, 16, unicode.MaxRune
  621. case eof:
  622. l.errorf("escape sequence not terminated")
  623. default:
  624. l.errorf("unknown escape sequence %#U", ch)
  625. }
  626. var x uint32
  627. for n > 0 {
  628. d := uint32(digitVal(ch))
  629. if d >= base {
  630. if ch == eof {
  631. l.errorf("escape sequence not terminated")
  632. }
  633. l.errorf("illegal character %#U in escape sequence", ch)
  634. }
  635. x = x*base + d
  636. ch = l.next()
  637. n--
  638. }
  639. if x > max || 0xD800 <= x && x < 0xE000 {
  640. l.errorf("escape sequence is an invalid Unicode code point")
  641. }
  642. }
  643. // digitVal returns the digit value of a rune or 16 in case the rune does not
  644. // represent a valid digit.
  645. func digitVal(ch rune) int {
  646. switch {
  647. case '0' <= ch && ch <= '9':
  648. return int(ch - '0')
  649. case 'a' <= ch && ch <= 'f':
  650. return int(ch - 'a' + 10)
  651. case 'A' <= ch && ch <= 'F':
  652. return int(ch - 'A' + 10)
  653. }
  654. return 16 // Larger than any legal digit val.
  655. }
  656. // lexString scans a quoted string. The initial quote has already been seen.
  657. func lexString(l *lexer) stateFn {
  658. Loop:
  659. for {
  660. switch l.next() {
  661. case '\\':
  662. lexEscape(l)
  663. case utf8.RuneError:
  664. return l.errorf("invalid UTF-8 rune")
  665. case eof, '\n':
  666. return l.errorf("unterminated quoted string")
  667. case l.stringOpen:
  668. break Loop
  669. }
  670. }
  671. l.emit(ItemString)
  672. return lexStatements
  673. }
  674. // lexRawString scans a raw quoted string. The initial quote has already been seen.
  675. func lexRawString(l *lexer) stateFn {
  676. Loop:
  677. for {
  678. switch l.next() {
  679. case utf8.RuneError:
  680. return l.errorf("invalid UTF-8 rune")
  681. case eof:
  682. return l.errorf("unterminated raw string")
  683. case l.stringOpen:
  684. break Loop
  685. }
  686. }
  687. l.emit(ItemString)
  688. return lexStatements
  689. }
  690. // lexSpace scans a run of space characters. One space has already been seen.
  691. func lexSpace(l *lexer) stateFn {
  692. for isSpace(l.peek()) {
  693. l.next()
  694. }
  695. l.ignore()
  696. return lexStatements
  697. }
  698. // lexLineComment scans a line comment. Left comment marker is known to be present.
  699. func lexLineComment(l *lexer) stateFn {
  700. l.pos += Pos(len(lineComment))
  701. for r := l.next(); !isEndOfLine(r) && r != eof; {
  702. r = l.next()
  703. }
  704. l.backup()
  705. l.emit(ItemComment)
  706. return lexStatements
  707. }
  708. func lexDuration(l *lexer) stateFn {
  709. if l.scanNumber() {
  710. return l.errorf("missing unit character in duration")
  711. }
  712. // Next two chars must be a valid unit and a non-alphanumeric.
  713. if l.accept("smhdwy") {
  714. if isAlphaNumeric(l.next()) {
  715. return l.errorf("bad duration syntax: %q", l.input[l.start:l.pos])
  716. }
  717. l.backup()
  718. l.emit(ItemDuration)
  719. return lexStatements
  720. }
  721. return l.errorf("bad duration syntax: %q", l.input[l.start:l.pos])
  722. }
  723. // lexNumber scans a number: decimal, hex, oct or float.
  724. func lexNumber(l *lexer) stateFn {
  725. if !l.scanNumber() {
  726. return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
  727. }
  728. l.emit(ItemNumber)
  729. return lexStatements
  730. }
  731. // lexNumberOrDuration scans a number or a duration item.
  732. func lexNumberOrDuration(l *lexer) stateFn {
  733. if l.scanNumber() {
  734. l.emit(ItemNumber)
  735. return lexStatements
  736. }
  737. // Next two chars must be a valid unit and a non-alphanumeric.
  738. if l.accept("smhdwy") {
  739. if isAlphaNumeric(l.next()) {
  740. return l.errorf("bad number or duration syntax: %q", l.input[l.start:l.pos])
  741. }
  742. l.backup()
  743. l.emit(ItemDuration)
  744. return lexStatements
  745. }
  746. return l.errorf("bad number or duration syntax: %q", l.input[l.start:l.pos])
  747. }
  748. // scanNumber scans numbers of different formats. The scanned item is
  749. // not necessarily a valid number. This case is caught by the parser.
  750. func (l *lexer) scanNumber() bool {
  751. digits := "0123456789"
  752. // Disallow hexadecimal in series descriptions as the syntax is ambiguous.
  753. if !l.seriesDesc && l.accept("0") && l.accept("xX") {
  754. digits = "0123456789abcdefABCDEF"
  755. }
  756. l.acceptRun(digits)
  757. if l.accept(".") {
  758. l.acceptRun(digits)
  759. }
  760. if l.accept("eE") {
  761. l.accept("+-")
  762. l.acceptRun("0123456789")
  763. }
  764. // Next thing must not be alphanumeric unless it's the times token
  765. // for series repetitions.
  766. if r := l.peek(); (l.seriesDesc && r == 'x') || !isAlphaNumeric(r) {
  767. return true
  768. }
  769. return false
  770. }
  771. // lexIdentifier scans an alphanumeric identifier. The next character
  772. // is known to be a letter.
  773. func lexIdentifier(l *lexer) stateFn {
  774. for isAlphaNumeric(l.next()) {
  775. // absorb
  776. }
  777. l.backup()
  778. l.emit(ItemIdentifier)
  779. return lexStatements
  780. }
  781. // lexKeywordOrIdentifier scans an alphanumeric identifier which may contain
  782. // a colon rune. If the identifier is a keyword the respective keyword item
  783. // is scanned.
  784. func lexKeywordOrIdentifier(l *lexer) stateFn {
  785. Loop:
  786. for {
  787. switch r := l.next(); {
  788. case isAlphaNumeric(r) || r == ':':
  789. // absorb.
  790. default:
  791. l.backup()
  792. word := l.input[l.start:l.pos]
  793. if kw, ok := key[strings.ToLower(word)]; ok {
  794. l.emit(kw)
  795. } else if !strings.Contains(word, ":") {
  796. l.emit(ItemIdentifier)
  797. } else {
  798. l.emit(ItemMetricIdentifier)
  799. }
  800. break Loop
  801. }
  802. }
  803. if l.seriesDesc && l.peek() != '{' {
  804. return lexValueSequence
  805. }
  806. return lexStatements
  807. }
  808. func isSpace(r rune) bool {
  809. return r == ' ' || r == '\t' || r == '\n' || r == '\r'
  810. }
  811. // isEndOfLine reports whether r is an end-of-line character.
  812. func isEndOfLine(r rune) bool {
  813. return r == '\r' || r == '\n'
  814. }
  815. // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore.
  816. func isAlphaNumeric(r rune) bool {
  817. return isAlpha(r) || isDigit(r)
  818. }
  819. // isDigit reports whether r is a digit. Note: we cannot use unicode.IsDigit()
  820. // instead because that also classifies non-Latin digits as digits. See
  821. // https://github.com/prometheus/prometheus/issues/939.
  822. func isDigit(r rune) bool {
  823. return '0' <= r && r <= '9'
  824. }
  825. // isAlpha reports whether r is an alphabetic or underscore.
  826. func isAlpha(r rune) bool {
  827. return r == '_' || ('a' <= r && r <= 'z') || ('A' <= r && r <= 'Z')
  828. }
  829. // isLabel reports whether the string can be used as label.
  830. func isLabel(s string) bool {
  831. if len(s) == 0 || !isAlpha(rune(s[0])) {
  832. return false
  833. }
  834. for _, c := range s[1:] {
  835. if !isAlphaNumeric(c) {
  836. return false
  837. }
  838. }
  839. return true
  840. }