transport.go 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273
  1. /*
  2. Copyright 2014 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package proxy
  14. import (
  15. "bytes"
  16. "compress/flate"
  17. "compress/gzip"
  18. "fmt"
  19. "io"
  20. "io/ioutil"
  21. "net/http"
  22. "net/url"
  23. "path"
  24. "strings"
  25. "golang.org/x/net/html"
  26. "golang.org/x/net/html/atom"
  27. "k8s.io/klog/v2"
  28. "k8s.io/apimachinery/pkg/api/errors"
  29. "k8s.io/apimachinery/pkg/util/net"
  30. "k8s.io/apimachinery/pkg/util/sets"
  31. )
  32. // atomsToAttrs states which attributes of which tags require URL substitution.
  33. // Sources: http://www.w3.org/TR/REC-html40/index/attributes.html
  34. //
  35. // http://www.w3.org/html/wg/drafts/html/master/index.html#attributes-1
  36. var atomsToAttrs = map[atom.Atom]sets.String{
  37. atom.A: sets.NewString("href"),
  38. atom.Applet: sets.NewString("codebase"),
  39. atom.Area: sets.NewString("href"),
  40. atom.Audio: sets.NewString("src"),
  41. atom.Base: sets.NewString("href"),
  42. atom.Blockquote: sets.NewString("cite"),
  43. atom.Body: sets.NewString("background"),
  44. atom.Button: sets.NewString("formaction"),
  45. atom.Command: sets.NewString("icon"),
  46. atom.Del: sets.NewString("cite"),
  47. atom.Embed: sets.NewString("src"),
  48. atom.Form: sets.NewString("action"),
  49. atom.Frame: sets.NewString("longdesc", "src"),
  50. atom.Head: sets.NewString("profile"),
  51. atom.Html: sets.NewString("manifest"),
  52. atom.Iframe: sets.NewString("longdesc", "src"),
  53. atom.Img: sets.NewString("longdesc", "src", "usemap"),
  54. atom.Input: sets.NewString("src", "usemap", "formaction"),
  55. atom.Ins: sets.NewString("cite"),
  56. atom.Link: sets.NewString("href"),
  57. atom.Object: sets.NewString("classid", "codebase", "data", "usemap"),
  58. atom.Q: sets.NewString("cite"),
  59. atom.Script: sets.NewString("src"),
  60. atom.Source: sets.NewString("src"),
  61. atom.Video: sets.NewString("poster", "src"),
  62. // TODO: css URLs hidden in style elements.
  63. }
  64. // Transport is a transport for text/html content that replaces URLs in html
  65. // content with the prefix of the proxy server
  66. type Transport struct {
  67. Scheme string
  68. Host string
  69. PathPrepend string
  70. http.RoundTripper
  71. }
  72. // RoundTrip implements the http.RoundTripper interface
  73. func (t *Transport) RoundTrip(req *http.Request) (*http.Response, error) {
  74. // Add reverse proxy headers.
  75. forwardedURI := path.Join(t.PathPrepend, req.URL.EscapedPath())
  76. if strings.HasSuffix(req.URL.Path, "/") {
  77. forwardedURI = forwardedURI + "/"
  78. }
  79. req.Header.Set("X-Forwarded-Uri", forwardedURI)
  80. if len(t.Host) > 0 {
  81. req.Header.Set("X-Forwarded-Host", t.Host)
  82. }
  83. if len(t.Scheme) > 0 {
  84. req.Header.Set("X-Forwarded-Proto", t.Scheme)
  85. }
  86. rt := t.RoundTripper
  87. if rt == nil {
  88. rt = http.DefaultTransport
  89. }
  90. resp, err := rt.RoundTrip(req)
  91. if err != nil {
  92. return nil, errors.NewServiceUnavailable(fmt.Sprintf("error trying to reach service: %v", err))
  93. }
  94. if redirect := resp.Header.Get("Location"); redirect != "" {
  95. targetURL, err := url.Parse(redirect)
  96. if err != nil {
  97. return nil, errors.NewInternalError(fmt.Errorf("error trying to parse Location header: %v", err))
  98. }
  99. resp.Header.Set("Location", t.rewriteURL(targetURL, req.URL, req.Host))
  100. return resp, nil
  101. }
  102. cType := resp.Header.Get("Content-Type")
  103. cType = strings.TrimSpace(strings.SplitN(cType, ";", 2)[0])
  104. if cType != "text/html" {
  105. // Do nothing, simply pass through
  106. return resp, nil
  107. }
  108. return t.rewriteResponse(req, resp)
  109. }
  110. var _ = net.RoundTripperWrapper(&Transport{})
  111. func (rt *Transport) WrappedRoundTripper() http.RoundTripper {
  112. return rt.RoundTripper
  113. }
  114. // rewriteURL rewrites a single URL to go through the proxy, if the URL refers
  115. // to the same host as sourceURL, which is the page on which the target URL
  116. // occurred, or if the URL matches the sourceRequestHost.
  117. func (t *Transport) rewriteURL(url *url.URL, sourceURL *url.URL, sourceRequestHost string) string {
  118. // Example:
  119. // When API server processes a proxy request to a service (e.g. /api/v1/namespace/foo/service/bar/proxy/),
  120. // the sourceURL.Host (i.e. req.URL.Host) is the endpoint IP address of the service. The
  121. // sourceRequestHost (i.e. req.Host) is the Host header that specifies the host on which the
  122. // URL is sought, which can be different from sourceURL.Host. For example, if user sends the
  123. // request through "kubectl proxy" locally (i.e. localhost:8001/api/v1/namespace/foo/service/bar/proxy/),
  124. // sourceRequestHost is "localhost:8001".
  125. //
  126. // If the service's response URL contains non-empty host, and url.Host is equal to either sourceURL.Host
  127. // or sourceRequestHost, we should not consider the returned URL to be a completely different host.
  128. // It's the API server's responsibility to rewrite a same-host-and-absolute-path URL and append the
  129. // necessary URL prefix (i.e. /api/v1/namespace/foo/service/bar/proxy/).
  130. isDifferentHost := url.Host != "" && url.Host != sourceURL.Host && url.Host != sourceRequestHost
  131. isRelative := !strings.HasPrefix(url.Path, "/")
  132. if isDifferentHost || isRelative {
  133. return url.String()
  134. }
  135. // Do not rewrite scheme and host if the Transport has empty scheme and host
  136. // when targetURL already contains the sourceRequestHost
  137. if !(url.Host == sourceRequestHost && t.Scheme == "" && t.Host == "") {
  138. url.Scheme = t.Scheme
  139. url.Host = t.Host
  140. }
  141. origPath := url.Path
  142. // Do not rewrite URL if the sourceURL already contains the necessary prefix.
  143. if strings.HasPrefix(url.Path, t.PathPrepend) {
  144. return url.String()
  145. }
  146. url.Path = path.Join(t.PathPrepend, url.Path)
  147. if strings.HasSuffix(origPath, "/") {
  148. // Add back the trailing slash, which was stripped by path.Join().
  149. url.Path += "/"
  150. }
  151. return url.String()
  152. }
  153. // rewriteHTML scans the HTML for tags with url-valued attributes, and updates
  154. // those values with the urlRewriter function. The updated HTML is output to the
  155. // writer.
  156. func rewriteHTML(reader io.Reader, writer io.Writer, urlRewriter func(*url.URL) string) error {
  157. // Note: This assumes the content is UTF-8.
  158. tokenizer := html.NewTokenizer(reader)
  159. var err error
  160. for err == nil {
  161. tokenType := tokenizer.Next()
  162. switch tokenType {
  163. case html.ErrorToken:
  164. err = tokenizer.Err()
  165. case html.StartTagToken, html.SelfClosingTagToken:
  166. token := tokenizer.Token()
  167. if urlAttrs, ok := atomsToAttrs[token.DataAtom]; ok {
  168. for i, attr := range token.Attr {
  169. if urlAttrs.Has(attr.Key) {
  170. url, err := url.Parse(attr.Val)
  171. if err != nil {
  172. // Do not rewrite the URL if it isn't valid. It is intended not
  173. // to error here to prevent the inability to understand the
  174. // content of the body to cause a fatal error.
  175. continue
  176. }
  177. token.Attr[i].Val = urlRewriter(url)
  178. }
  179. }
  180. }
  181. _, err = writer.Write([]byte(token.String()))
  182. default:
  183. _, err = writer.Write(tokenizer.Raw())
  184. }
  185. }
  186. if err != io.EOF {
  187. return err
  188. }
  189. return nil
  190. }
  191. // rewriteResponse modifies an HTML response by updating absolute links referring
  192. // to the original host to instead refer to the proxy transport.
  193. func (t *Transport) rewriteResponse(req *http.Request, resp *http.Response) (*http.Response, error) {
  194. origBody := resp.Body
  195. defer origBody.Close()
  196. newContent := &bytes.Buffer{}
  197. var reader io.Reader = origBody
  198. var writer io.Writer = newContent
  199. encoding := resp.Header.Get("Content-Encoding")
  200. switch encoding {
  201. case "gzip":
  202. var err error
  203. reader, err = gzip.NewReader(reader)
  204. if err != nil {
  205. return nil, fmt.Errorf("errorf making gzip reader: %v", err)
  206. }
  207. gzw := gzip.NewWriter(writer)
  208. defer gzw.Close()
  209. writer = gzw
  210. case "deflate":
  211. var err error
  212. reader = flate.NewReader(reader)
  213. flw, err := flate.NewWriter(writer, flate.BestCompression)
  214. if err != nil {
  215. return nil, fmt.Errorf("errorf making flate writer: %v", err)
  216. }
  217. defer func() {
  218. flw.Close()
  219. flw.Flush()
  220. }()
  221. writer = flw
  222. case "":
  223. // This is fine
  224. default:
  225. // Some encoding we don't understand-- don't try to parse this
  226. klog.Errorf("Proxy encountered encoding %v for text/html; can't understand this so not fixing links.", encoding)
  227. return resp, nil
  228. }
  229. urlRewriter := func(targetUrl *url.URL) string {
  230. return t.rewriteURL(targetUrl, req.URL, req.Host)
  231. }
  232. err := rewriteHTML(reader, writer, urlRewriter)
  233. if err != nil {
  234. klog.Errorf("Failed to rewrite URLs: %v", err)
  235. return resp, err
  236. }
  237. resp.Body = ioutil.NopCloser(newContent)
  238. // Update header node with new content-length
  239. // TODO: Remove any hash/signature headers here?
  240. resp.Header.Del("Content-Length")
  241. resp.ContentLength = int64(newContent.Len())
  242. return resp, err
  243. }