| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130 |
- package rulesfn
- import (
- "fmt"
- "net"
- "net/url"
- "strings"
- smithyhttp "github.com/aws/smithy-go/transport/http"
- )
- // IsValidHostLabel returns if the input is a single valid [RFC 1123] host
- // label. If allowSubDomains is true, will allow validation to include nested
- // host labels. Returns false if the input is not a valid host label. If errors
- // occur they will be added to the provided [ErrorCollector].
- //
- // [RFC 1123]: https://www.ietf.org/rfc/rfc1123.txt
- func IsValidHostLabel(input string, allowSubDomains bool) bool {
- var labels []string
- if allowSubDomains {
- labels = strings.Split(input, ".")
- } else {
- labels = []string{input}
- }
- for _, label := range labels {
- if !smithyhttp.ValidHostLabel(label) {
- return false
- }
- }
- return true
- }
- // ParseURL returns a [URL] if the provided string could be parsed. Returns nil
- // if the string could not be parsed. Any parsing error will be added to the
- // [ErrorCollector].
- //
- // If the input URL string contains an IP6 address with a zone index. The
- // returned [builtin.URL.Authority] value will contain the percent escaped (%)
- // zone index separator.
- func ParseURL(input string) *URL {
- u, err := url.Parse(input)
- if err != nil {
- return nil
- }
- if u.RawQuery != "" {
- return nil
- }
- if u.Scheme != "http" && u.Scheme != "https" {
- return nil
- }
- normalizedPath := u.Path
- if !strings.HasPrefix(normalizedPath, "/") {
- normalizedPath = "/" + normalizedPath
- }
- if !strings.HasSuffix(normalizedPath, "/") {
- normalizedPath = normalizedPath + "/"
- }
- // IP6 hosts may have zone indexes that need to be escaped to be valid in a
- // URI. The Go URL parser will unescape the `%25` into `%`. This needs to
- // be reverted since the returned URL will be used in string builders.
- authority := strings.ReplaceAll(u.Host, "%", "%25")
- return &URL{
- Scheme: u.Scheme,
- Authority: authority,
- Path: u.Path,
- NormalizedPath: normalizedPath,
- IsIp: net.ParseIP(hostnameWithoutZone(u)) != nil,
- }
- }
- // URL provides the structure describing the parts of a parsed URL returned by
- // [ParseURL].
- type URL struct {
- Scheme string // https://www.rfc-editor.org/rfc/rfc3986#section-3.1
- Authority string // https://www.rfc-editor.org/rfc/rfc3986#section-3.2
- Path string // https://www.rfc-editor.org/rfc/rfc3986#section-3.3
- NormalizedPath string // https://www.rfc-editor.org/rfc/rfc3986#section-6.2.3
- IsIp bool
- }
- // URIEncode returns an percent-encoded [RFC3986 section 2.1] version of the
- // input string.
- //
- // [RFC3986 section 2.1]: https://www.rfc-editor.org/rfc/rfc3986#section-2.1
- func URIEncode(input string) string {
- var output strings.Builder
- for _, c := range []byte(input) {
- if validPercentEncodedChar(c) {
- output.WriteByte(c)
- continue
- }
- fmt.Fprintf(&output, "%%%X", c)
- }
- return output.String()
- }
- func validPercentEncodedChar(c byte) bool {
- return (c >= 'a' && c <= 'z') ||
- (c >= 'A' && c <= 'Z') ||
- (c >= '0' && c <= '9') ||
- c == '-' || c == '_' || c == '.' || c == '~'
- }
- // hostname implements u.Hostname() but strips the ipv6 zone ID (if present)
- // such that net.ParseIP can still recognize IPv6 addresses with zone IDs.
- //
- // FUTURE(10/2023): netip.ParseAddr handles this natively but we can't take
- // that package as a dependency yet due to our min go version (1.15, netip
- // starts in 1.18). When we align with go runtime deprecation policy in
- // 10/2023, we can remove this.
- func hostnameWithoutZone(u *url.URL) string {
- full := u.Hostname()
- // this more or less mimics the internals of net/ (see unexported
- // splitHostZone in that source) but throws the zone away because we don't
- // need it
- if i := strings.LastIndex(full, "%"); i > -1 {
- return full[:i]
- }
- return full
- }
|