quote.go 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. // Copyright 2015 The Prometheus Authors
  2. // Licensed under the Apache License, Version 2.0 (the "License");
  3. // you may not use this file except in compliance with the License.
  4. // You may obtain a copy of the License at
  5. //
  6. // http://www.apache.org/licenses/LICENSE-2.0
  7. //
  8. // Unless required by applicable law or agreed to in writing, software
  9. // distributed under the License is distributed on an "AS IS" BASIS,
  10. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. // See the License for the specific language governing permissions and
  12. // limitations under the License.
  13. package strutil
  14. import (
  15. "errors"
  16. "unicode/utf8"
  17. )
  18. // ErrSyntax indicates that a value does not have the right syntax for the target type.
  19. var ErrSyntax = errors.New("invalid syntax")
  20. // Unquote interprets s as a single-quoted, double-quoted, or backquoted
  21. // Prometheus query language string literal, returning the string value that s
  22. // quotes.
  23. //
  24. // NOTE: This function as well as the necessary helper functions below
  25. // (unquoteChar, contains, unhex) and associated tests have been adapted from
  26. // the corresponding functions in the "strconv" package of the Go standard
  27. // library to work for Prometheus-style strings. Go's special-casing for single
  28. // quotes was removed and single quoted strings are now treated the same as
  29. // double quoted ones.
  30. func Unquote(s string) (t string, err error) {
  31. n := len(s)
  32. if n < 2 {
  33. return "", ErrSyntax
  34. }
  35. quote := s[0]
  36. if quote != s[n-1] {
  37. return "", ErrSyntax
  38. }
  39. s = s[1 : n-1]
  40. if quote == '`' {
  41. if contains(s, '`') {
  42. return "", ErrSyntax
  43. }
  44. return s, nil
  45. }
  46. if quote != '"' && quote != '\'' {
  47. return "", ErrSyntax
  48. }
  49. if contains(s, '\n') {
  50. return "", ErrSyntax
  51. }
  52. // Is it trivial? Avoid allocation.
  53. if !contains(s, '\\') && !contains(s, quote) {
  54. return s, nil
  55. }
  56. var runeTmp [utf8.UTFMax]byte
  57. buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
  58. for len(s) > 0 {
  59. c, multibyte, ss, err := unquoteChar(s, quote)
  60. if err != nil {
  61. return "", err
  62. }
  63. s = ss
  64. if c < utf8.RuneSelf || !multibyte {
  65. buf = append(buf, byte(c))
  66. } else {
  67. n := utf8.EncodeRune(runeTmp[:], c)
  68. buf = append(buf, runeTmp[:n]...)
  69. }
  70. }
  71. return string(buf), nil
  72. }
  73. // unquoteChar decodes the first character or byte in the escaped string
  74. // or character literal represented by the string s.
  75. // It returns four values:
  76. //
  77. // 1) value, the decoded Unicode code point or byte value;
  78. // 2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation;
  79. // 3) tail, the remainder of the string after the character; and
  80. // 4) an error that will be nil if the character is syntactically valid.
  81. //
  82. // The second argument, quote, specifies the type of literal being parsed
  83. // and therefore which escaped quote character is permitted.
  84. // If set to a single quote, it permits the sequence \' and disallows unescaped '.
  85. // If set to a double quote, it permits \" and disallows unescaped ".
  86. // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped.
  87. func unquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) {
  88. // easy cases
  89. switch c := s[0]; {
  90. case c == quote && (quote == '\'' || quote == '"'):
  91. err = ErrSyntax
  92. return
  93. case c >= utf8.RuneSelf:
  94. r, size := utf8.DecodeRuneInString(s)
  95. return r, true, s[size:], nil
  96. case c != '\\':
  97. return rune(s[0]), false, s[1:], nil
  98. }
  99. // Hard case: c is backslash.
  100. if len(s) <= 1 {
  101. err = ErrSyntax
  102. return
  103. }
  104. c := s[1]
  105. s = s[2:]
  106. switch c {
  107. case 'a':
  108. value = '\a'
  109. case 'b':
  110. value = '\b'
  111. case 'f':
  112. value = '\f'
  113. case 'n':
  114. value = '\n'
  115. case 'r':
  116. value = '\r'
  117. case 't':
  118. value = '\t'
  119. case 'v':
  120. value = '\v'
  121. case 'x', 'u', 'U':
  122. n := 0
  123. switch c {
  124. case 'x':
  125. n = 2
  126. case 'u':
  127. n = 4
  128. case 'U':
  129. n = 8
  130. }
  131. var v rune
  132. if len(s) < n {
  133. err = ErrSyntax
  134. return
  135. }
  136. for j := 0; j < n; j++ {
  137. x, ok := unhex(s[j])
  138. if !ok {
  139. err = ErrSyntax
  140. return
  141. }
  142. v = v<<4 | x
  143. }
  144. s = s[n:]
  145. if c == 'x' {
  146. // Single-byte string, possibly not UTF-8.
  147. value = v
  148. break
  149. }
  150. if v > utf8.MaxRune {
  151. err = ErrSyntax
  152. return
  153. }
  154. value = v
  155. multibyte = true
  156. case '0', '1', '2', '3', '4', '5', '6', '7':
  157. v := rune(c) - '0'
  158. if len(s) < 2 {
  159. err = ErrSyntax
  160. return
  161. }
  162. for j := 0; j < 2; j++ { // One digit already; two more.
  163. x := rune(s[j]) - '0'
  164. if x < 0 || x > 7 {
  165. err = ErrSyntax
  166. return
  167. }
  168. v = (v << 3) | x
  169. }
  170. s = s[2:]
  171. if v > 255 {
  172. err = ErrSyntax
  173. return
  174. }
  175. value = v
  176. case '\\':
  177. value = '\\'
  178. case '\'', '"':
  179. if c != quote {
  180. err = ErrSyntax
  181. return
  182. }
  183. value = rune(c)
  184. default:
  185. err = ErrSyntax
  186. return
  187. }
  188. tail = s
  189. return
  190. }
  191. // contains reports whether the string contains the byte c.
  192. func contains(s string, c byte) bool {
  193. for i := 0; i < len(s); i++ {
  194. if s[i] == c {
  195. return true
  196. }
  197. }
  198. return false
  199. }
  200. func unhex(b byte) (v rune, ok bool) {
  201. c := rune(b)
  202. switch {
  203. case '0' <= c && c <= '9':
  204. return c - '0', true
  205. case 'a' <= c && c <= 'f':
  206. return c - 'a' + 10, true
  207. case 'A' <= c && c <= 'F':
  208. return c - 'A' + 10, true
  209. }
  210. return
  211. }