redis_tokenizer.go 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. // Unless explicitly stated otherwise all files in this repository are licensed
  2. // under the Apache License Version 2.0.
  3. // This product includes software developed at Datadog (https://www.datadoghq.com/).
  4. // Copyright 2016-present Datadog, Inc.
  5. package obfuscate
  6. import (
  7. "bytes"
  8. "strings"
  9. )
  10. // redisTokenType specifies the token type returned by the tokenizer.
  11. type redisTokenType int
  12. const (
  13. // redisTokenCommand is a command token. For compound tokens, it is
  14. // only the first part up to a space.
  15. redisTokenCommand redisTokenType = iota
  16. // redisTokenArgument is an argument token.
  17. redisTokenArgument
  18. )
  19. // String implements fmt.Stringer.
  20. func (t redisTokenType) String() string {
  21. return map[redisTokenType]string{
  22. redisTokenCommand: "command",
  23. redisTokenArgument: "argument",
  24. }[t]
  25. }
  26. // redisTokenizer tokenizes a Redis command string. The string can be on
  27. // multiple lines. The tokenizer is capable of parsing quoted strings and escape
  28. // sequences inside them.
  29. type redisTokenizer struct {
  30. data []byte
  31. ch byte
  32. off int
  33. done bool
  34. state redisParseState
  35. }
  36. // redisParseState specifies the current state of the tokenizer.
  37. type redisParseState int
  38. const (
  39. // redisStateCommand specifies that we are about to parse a command.
  40. // It is usually the state at the beginning of the scan or after a
  41. // new line.
  42. redisStateCommand redisParseState = iota
  43. // redisStateArgument specifies that we are about to parse an argument
  44. // to a command or the rest of the tokens in a compound command.
  45. redisStateArgument
  46. )
  47. // newRedisTokenizer returns a new tokenizer for the given data.
  48. func newRedisTokenizer(data []byte) *redisTokenizer {
  49. return &redisTokenizer{
  50. data: bytes.TrimSpace(data),
  51. off: -1,
  52. state: redisStateCommand,
  53. }
  54. }
  55. // scan returns the next token, it's type and a bool. The boolean specifies if
  56. // the returned token was the last one.
  57. func (t *redisTokenizer) scan() (tok string, typ redisTokenType, done bool) {
  58. switch t.state {
  59. case redisStateCommand:
  60. return t.scanCommand()
  61. default:
  62. return t.scanArg()
  63. }
  64. }
  65. // next advances the scanner to the next character.
  66. func (t *redisTokenizer) next() {
  67. t.off++
  68. if t.off <= len(t.data)-1 {
  69. t.ch = t.data[t.off]
  70. return
  71. }
  72. t.done = true
  73. }
  74. // scanCommand scans a command from the buffer.
  75. func (t *redisTokenizer) scanCommand() (tok string, typ redisTokenType, done bool) {
  76. var (
  77. str strings.Builder
  78. started bool
  79. )
  80. for {
  81. t.next()
  82. if t.done {
  83. return str.String(), typ, t.done
  84. }
  85. switch t.ch {
  86. case ' ':
  87. if !started {
  88. // skip spaces preceding token
  89. t.skipSpace()
  90. break
  91. }
  92. // done scanning command
  93. t.state = redisStateArgument
  94. t.skipSpace()
  95. return str.String(), redisTokenCommand, t.done
  96. case '\n':
  97. return str.String(), redisTokenCommand, t.done
  98. default:
  99. str.WriteByte(t.ch)
  100. }
  101. started = true
  102. }
  103. }
  104. // scanArg scans an argument from the buffer.
  105. func (t *redisTokenizer) scanArg() (tok string, typ redisTokenType, done bool) {
  106. var (
  107. str strings.Builder
  108. quoted bool // in quoted string
  109. escape bool // escape sequence
  110. )
  111. for {
  112. t.next()
  113. if t.done {
  114. return str.String(), redisTokenArgument, t.done
  115. }
  116. switch t.ch {
  117. case '\\':
  118. str.WriteByte('\\')
  119. if !escape {
  120. // next character could be escaped
  121. escape = true
  122. continue
  123. }
  124. case '\n':
  125. if !quoted {
  126. // last argument, new command follows
  127. t.state = redisStateCommand
  128. return str.String(), redisTokenArgument, t.done
  129. }
  130. str.WriteByte('\n')
  131. case '"':
  132. str.WriteByte('"')
  133. if !escape {
  134. // this quote wasn't escaped, toggle quoted mode
  135. quoted = !quoted
  136. }
  137. case ' ':
  138. if !quoted {
  139. t.skipSpace()
  140. return str.String(), redisTokenArgument, t.done
  141. }
  142. str.WriteByte(' ')
  143. default:
  144. str.WriteByte(t.ch)
  145. }
  146. escape = false
  147. }
  148. }
  149. // unread is the reverse of next, unreading a character.
  150. func (t *redisTokenizer) unread() {
  151. if t.off < 1 {
  152. return
  153. }
  154. t.off--
  155. t.ch = t.data[t.off]
  156. }
  157. // skipSpace moves the cursor forward until it meets the last space
  158. // in a sequence of contiguous spaces.
  159. func (t *redisTokenizer) skipSpace() {
  160. for t.ch == ' ' || t.ch == '\t' || t.ch == '\r' && !t.done {
  161. t.next()
  162. }
  163. if t.ch == '\n' {
  164. // next token is a command
  165. t.state = redisStateCommand
  166. } else {
  167. // don't steal the first non-space character
  168. t.unread()
  169. }
  170. }