uri.go 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. package rulesfn
  2. import (
  3. "fmt"
  4. "net"
  5. "net/url"
  6. "strings"
  7. smithyhttp "github.com/aws/smithy-go/transport/http"
  8. )
  9. // IsValidHostLabel returns if the input is a single valid [RFC 1123] host
  10. // label. If allowSubDomains is true, will allow validation to include nested
  11. // host labels. Returns false if the input is not a valid host label. If errors
  12. // occur they will be added to the provided [ErrorCollector].
  13. //
  14. // [RFC 1123]: https://www.ietf.org/rfc/rfc1123.txt
  15. func IsValidHostLabel(input string, allowSubDomains bool) bool {
  16. var labels []string
  17. if allowSubDomains {
  18. labels = strings.Split(input, ".")
  19. } else {
  20. labels = []string{input}
  21. }
  22. for _, label := range labels {
  23. if !smithyhttp.ValidHostLabel(label) {
  24. return false
  25. }
  26. }
  27. return true
  28. }
  29. // ParseURL returns a [URL] if the provided string could be parsed. Returns nil
  30. // if the string could not be parsed. Any parsing error will be added to the
  31. // [ErrorCollector].
  32. //
  33. // If the input URL string contains an IP6 address with a zone index. The
  34. // returned [builtin.URL.Authority] value will contain the percent escaped (%)
  35. // zone index separator.
  36. func ParseURL(input string) *URL {
  37. u, err := url.Parse(input)
  38. if err != nil {
  39. return nil
  40. }
  41. if u.RawQuery != "" {
  42. return nil
  43. }
  44. if u.Scheme != "http" && u.Scheme != "https" {
  45. return nil
  46. }
  47. normalizedPath := u.Path
  48. if !strings.HasPrefix(normalizedPath, "/") {
  49. normalizedPath = "/" + normalizedPath
  50. }
  51. if !strings.HasSuffix(normalizedPath, "/") {
  52. normalizedPath = normalizedPath + "/"
  53. }
  54. // IP6 hosts may have zone indexes that need to be escaped to be valid in a
  55. // URI. The Go URL parser will unescape the `%25` into `%`. This needs to
  56. // be reverted since the returned URL will be used in string builders.
  57. authority := strings.ReplaceAll(u.Host, "%", "%25")
  58. return &URL{
  59. Scheme: u.Scheme,
  60. Authority: authority,
  61. Path: u.Path,
  62. NormalizedPath: normalizedPath,
  63. IsIp: net.ParseIP(hostnameWithoutZone(u)) != nil,
  64. }
  65. }
  66. // URL provides the structure describing the parts of a parsed URL returned by
  67. // [ParseURL].
  68. type URL struct {
  69. Scheme string // https://www.rfc-editor.org/rfc/rfc3986#section-3.1
  70. Authority string // https://www.rfc-editor.org/rfc/rfc3986#section-3.2
  71. Path string // https://www.rfc-editor.org/rfc/rfc3986#section-3.3
  72. NormalizedPath string // https://www.rfc-editor.org/rfc/rfc3986#section-6.2.3
  73. IsIp bool
  74. }
  75. // URIEncode returns an percent-encoded [RFC3986 section 2.1] version of the
  76. // input string.
  77. //
  78. // [RFC3986 section 2.1]: https://www.rfc-editor.org/rfc/rfc3986#section-2.1
  79. func URIEncode(input string) string {
  80. var output strings.Builder
  81. for _, c := range []byte(input) {
  82. if validPercentEncodedChar(c) {
  83. output.WriteByte(c)
  84. continue
  85. }
  86. fmt.Fprintf(&output, "%%%X", c)
  87. }
  88. return output.String()
  89. }
  90. func validPercentEncodedChar(c byte) bool {
  91. return (c >= 'a' && c <= 'z') ||
  92. (c >= 'A' && c <= 'Z') ||
  93. (c >= '0' && c <= '9') ||
  94. c == '-' || c == '_' || c == '.' || c == '~'
  95. }
  96. // hostname implements u.Hostname() but strips the ipv6 zone ID (if present)
  97. // such that net.ParseIP can still recognize IPv6 addresses with zone IDs.
  98. //
  99. // FUTURE(10/2023): netip.ParseAddr handles this natively but we can't take
  100. // that package as a dependency yet due to our min go version (1.15, netip
  101. // starts in 1.18). When we align with go runtime deprecation policy in
  102. // 10/2023, we can remove this.
  103. func hostnameWithoutZone(u *url.URL) string {
  104. full := u.Hostname()
  105. // this more or less mimics the internals of net/ (see unexported
  106. // splitHostZone in that source) but throws the zone away because we don't
  107. // need it
  108. if i := strings.LastIndex(full, "%"); i > -1 {
  109. return full[:i]
  110. }
  111. return full
  112. }