obfuscate.go 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233
  1. // Unless explicitly stated otherwise all files in this repository are licensed
  2. // under the Apache License Version 2.0.
  3. // This product includes software developed at Datadog (https://www.datadoghq.com/).
  4. // Copyright 2016-present Datadog, Inc.
  5. // Package obfuscate implements quantizing and obfuscating of tags and resources for
  6. // a set of spans matching a certain criteria.
  7. //
  8. // This module is used in the Datadog Agent, the Go tracing client (dd-trace-go) and in the
  9. // OpenTelemetry Collector Datadog exporter./ End-user behavior is stable, but there are no
  10. // stability guarantees on its public Go API. Nonetheless, if editing try to avoid breaking
  11. // API changes if possible and double check the API usage on all module dependents.
  12. package obfuscate
  13. import (
  14. "bytes"
  15. "sync/atomic"
  16. "github.com/DataDog/datadog-go/statsd"
  17. )
  18. //go:generate easyjson -no_std_marshalers $GOFILE
  19. // Obfuscator quantizes and obfuscates spans. The obfuscator is not safe for
  20. // concurrent use.
  21. type Obfuscator struct {
  22. opts *Config
  23. es *jsonObfuscator // nil if disabled
  24. mongo *jsonObfuscator // nil if disabled
  25. sqlExecPlan *jsonObfuscator // nil if disabled
  26. sqlExecPlanNormalize *jsonObfuscator // nil if disabled
  27. // sqlLiteralEscapes reports whether we should treat escape characters literally or as escape characters.
  28. // A non-zero value means 'yes'. Different SQL engines behave in different ways and the tokenizer needs
  29. // to be generic.
  30. // Not safe for concurrent use.
  31. sqlLiteralEscapes int32
  32. // queryCache keeps a cache of already obfuscated queries.
  33. queryCache *measuredCache
  34. log Logger
  35. }
  36. // Logger is able to log certain log messages.
  37. type Logger interface {
  38. // Debugf logs the given message using the given format.
  39. Debugf(format string, params ...interface{})
  40. }
  41. type noopLogger struct{}
  42. func (noopLogger) Debugf(_ string, _ ...interface{}) {}
  43. // setSQLLiteralEscapes sets whether or not escape characters should be treated literally by the SQL obfuscator.
  44. func (o *Obfuscator) setSQLLiteralEscapes(ok bool) {
  45. if ok {
  46. atomic.StoreInt32(&o.sqlLiteralEscapes, 1)
  47. } else {
  48. atomic.StoreInt32(&o.sqlLiteralEscapes, 0)
  49. }
  50. }
  51. // useSQLLiteralEscapes reports whether escape characters will be treated literally by the SQL obfuscator.
  52. // Some SQL engines require it and others don't. It will be detected as SQL queries are being obfuscated
  53. // through calls to ObfuscateSQLString and automatically set for future.
  54. func (o *Obfuscator) useSQLLiteralEscapes() bool {
  55. return atomic.LoadInt32(&o.sqlLiteralEscapes) == 1
  56. }
  57. // Config holds the configuration for obfuscating sensitive data for various span types.
  58. type Config struct {
  59. // SQL holds the obfuscation configuration for SQL queries.
  60. SQL SQLConfig
  61. // ES holds the obfuscation configuration for ElasticSearch bodies.
  62. ES JSONConfig
  63. // Mongo holds the obfuscation configuration for MongoDB queries.
  64. Mongo JSONConfig
  65. // SQLExecPlan holds the obfuscation configuration for SQL Exec Plans. This is strictly for safety related obfuscation,
  66. // not normalization. Normalization of exec plans is configured in SQLExecPlanNormalize.
  67. SQLExecPlan JSONConfig
  68. // SQLExecPlanNormalize holds the normalization configuration for SQL Exec Plans.
  69. SQLExecPlanNormalize JSONConfig
  70. // HTTP holds the obfuscation settings for HTTP URLs.
  71. HTTP HTTPConfig
  72. // Statsd specifies the statsd client to use for reporting metrics.
  73. Statsd StatsClient
  74. // Logger specifies the logger to use when outputting messages.
  75. // If unset, no logs will be outputted.
  76. Logger Logger
  77. }
  78. // StatsClient implementations are able to emit stats.
  79. type StatsClient interface {
  80. // Gauge reports a gauge stat with the given name, value, tags and rate.
  81. Gauge(name string, value float64, tags []string, rate float64) error
  82. }
  83. // SQLConfig holds the config for obfuscating SQL.
  84. // easyjson:json
  85. type SQLConfig struct {
  86. // TableNames specifies whether the obfuscator should also extract the table names that a query addresses,
  87. // in addition to obfuscating.
  88. TableNames bool
  89. // ReplaceDigits specifies whether digits in table names and identifiers should be obfuscated.
  90. ReplaceDigits bool `json:"replace_digits"`
  91. // KeepSQLAlias reports whether SQL aliases ("AS") should be truncated.
  92. KeepSQLAlias bool
  93. // DollarQuotedFunc reports whether to treat "$func$" delimited dollar-quoted strings
  94. // differently and not obfuscate them as a string. To read more about dollar quoted
  95. // strings see:
  96. //
  97. // https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-DOLLAR-QUOTING
  98. DollarQuotedFunc bool
  99. // Cache reports whether the obfuscator should use a LRU look-up cache for SQL obfuscations.
  100. Cache bool
  101. }
  102. // HTTPConfig holds the configuration settings for HTTP obfuscation.
  103. type HTTPConfig struct {
  104. // RemoveQueryStrings determines query strings to be removed from HTTP URLs.
  105. RemoveQueryString bool
  106. // RemovePathDigits determines digits in path segments to be obfuscated.
  107. RemovePathDigits bool
  108. }
  109. // JSONConfig holds the obfuscation configuration for sensitive
  110. // data found in JSON objects.
  111. type JSONConfig struct {
  112. // Enabled will specify whether obfuscation should be enabled.
  113. Enabled bool
  114. // KeepValues will specify a set of keys for which their values will
  115. // not be obfuscated.
  116. KeepValues []string
  117. // ObfuscateSQLValues will specify a set of keys for which their values
  118. // will be passed through SQL obfuscation
  119. ObfuscateSQLValues []string
  120. }
  121. // NewObfuscator creates a new obfuscator
  122. func NewObfuscator(cfg Config) *Obfuscator {
  123. if cfg.Logger == nil {
  124. cfg.Logger = noopLogger{}
  125. }
  126. o := Obfuscator{
  127. opts: &cfg,
  128. queryCache: newMeasuredCache(cacheOptions{On: cfg.SQL.Cache, Statsd: cfg.Statsd}),
  129. }
  130. if cfg.ES.Enabled {
  131. o.es = newJSONObfuscator(&cfg.ES, &o)
  132. }
  133. if cfg.Mongo.Enabled {
  134. o.mongo = newJSONObfuscator(&cfg.Mongo, &o)
  135. }
  136. if cfg.SQLExecPlan.Enabled {
  137. o.sqlExecPlan = newJSONObfuscator(&cfg.SQLExecPlan, &o)
  138. }
  139. if cfg.SQLExecPlanNormalize.Enabled {
  140. o.sqlExecPlanNormalize = newJSONObfuscator(&cfg.SQLExecPlanNormalize, &o)
  141. }
  142. if cfg.Statsd == nil {
  143. cfg.Statsd = &statsd.NoOpClient{}
  144. }
  145. return &o
  146. }
  147. // Stop cleans up after a finished Obfuscator.
  148. func (o *Obfuscator) Stop() {
  149. o.queryCache.Close()
  150. }
  151. // compactWhitespaces compacts all whitespaces in t.
  152. func compactWhitespaces(t string) string {
  153. n := len(t)
  154. r := make([]byte, n)
  155. spaceCode := uint8(32)
  156. isWhitespace := func(char uint8) bool { return char == spaceCode }
  157. nr := 0
  158. offset := 0
  159. for i := 0; i < n; i++ {
  160. if isWhitespace(t[i]) {
  161. copy(r[nr:], t[nr+offset:i])
  162. r[i-offset] = spaceCode
  163. nr = i + 1 - offset
  164. for j := i + 1; j < n; j++ {
  165. if !isWhitespace(t[j]) {
  166. offset += j - i - 1
  167. i = j
  168. break
  169. } else if j == n-1 {
  170. offset += j - i
  171. i = j
  172. break
  173. }
  174. }
  175. }
  176. }
  177. copy(r[nr:], t[nr+offset:n])
  178. r = r[:n-offset]
  179. return string(bytes.Trim(r, " "))
  180. }
  181. // replaceDigits replaces consecutive sequences of digits with '?',
  182. // example: "jobs_2020_1597876964" --> "jobs_?_?"
  183. func replaceDigits(buffer []byte) []byte {
  184. scanningDigit := false
  185. filtered := buffer[:0]
  186. for _, b := range buffer {
  187. // digits are encoded as 1 byte in utf8
  188. if isDigit(rune(b)) {
  189. if scanningDigit {
  190. continue
  191. }
  192. scanningDigit = true
  193. filtered = append(filtered, byte('?'))
  194. continue
  195. }
  196. scanningDigit = false
  197. filtered = append(filtered, b)
  198. }
  199. return filtered
  200. }