normalize_url.go 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. package internal
  2. import (
  3. "net/url"
  4. "regexp"
  5. "strings"
  6. )
  7. const (
  8. defaultHttpPort = ":80"
  9. defaultHttpsPort = ":443"
  10. )
  11. // Regular expressions used by the normalizations
  12. var rxPort = regexp.MustCompile(`(:\d+)/?$`)
  13. var rxDupSlashes = regexp.MustCompile(`/{2,}`)
  14. // NormalizeURL will normalize the specified URL
  15. // This was added to replace a previous call to the no longer maintained purell library:
  16. // The call that was used looked like the following:
  17. // url.Parse(purell.NormalizeURL(parsed, purell.FlagsSafe|purell.FlagRemoveDuplicateSlashes))
  18. //
  19. // To explain all that was included in the call above, purell.FlagsSafe was really just the following:
  20. // - FlagLowercaseScheme
  21. // - FlagLowercaseHost
  22. // - FlagRemoveDefaultPort
  23. // - FlagRemoveDuplicateSlashes (and this was mixed in with the |)
  24. func NormalizeURL(u *url.URL) {
  25. lowercaseScheme(u)
  26. lowercaseHost(u)
  27. removeDefaultPort(u)
  28. removeDuplicateSlashes(u)
  29. }
  30. func lowercaseScheme(u *url.URL) {
  31. if len(u.Scheme) > 0 {
  32. u.Scheme = strings.ToLower(u.Scheme)
  33. }
  34. }
  35. func lowercaseHost(u *url.URL) {
  36. if len(u.Host) > 0 {
  37. u.Host = strings.ToLower(u.Host)
  38. }
  39. }
  40. func removeDefaultPort(u *url.URL) {
  41. if len(u.Host) > 0 {
  42. scheme := strings.ToLower(u.Scheme)
  43. u.Host = rxPort.ReplaceAllStringFunc(u.Host, func(val string) string {
  44. if (scheme == "http" && val == defaultHttpPort) || (scheme == "https" && val == defaultHttpsPort) {
  45. return ""
  46. }
  47. return val
  48. })
  49. }
  50. }
  51. func removeDuplicateSlashes(u *url.URL) {
  52. if len(u.Path) > 0 {
  53. u.Path = rxDupSlashes.ReplaceAllString(u.Path, "/")
  54. }
  55. }