reader.go 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276
  1. // Copyright 2016 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package storage
  15. import (
  16. "context"
  17. "fmt"
  18. "hash/crc32"
  19. "io"
  20. "io/ioutil"
  21. "net/http"
  22. "strings"
  23. "time"
  24. "cloud.google.com/go/internal/trace"
  25. )
  26. var crc32cTable = crc32.MakeTable(crc32.Castagnoli)
  27. // ReaderObjectAttrs are attributes about the object being read. These are populated
  28. // during the New call. This struct only holds a subset of object attributes: to
  29. // get the full set of attributes, use ObjectHandle.Attrs.
  30. //
  31. // Each field is read-only.
  32. type ReaderObjectAttrs struct {
  33. // Size is the length of the object's content.
  34. Size int64
  35. // StartOffset is the byte offset within the object
  36. // from which reading begins.
  37. // This value is only non-zero for range requests.
  38. StartOffset int64
  39. // ContentType is the MIME type of the object's content.
  40. ContentType string
  41. // ContentEncoding is the encoding of the object's content.
  42. ContentEncoding string
  43. // CacheControl specifies whether and for how long browser and Internet
  44. // caches are allowed to cache your objects.
  45. CacheControl string
  46. // LastModified is the time that the object was last modified.
  47. LastModified time.Time
  48. // Generation is the generation number of the object's content.
  49. Generation int64
  50. // Metageneration is the version of the metadata for this object at
  51. // this generation. This field is used for preconditions and for
  52. // detecting changes in metadata. A metageneration number is only
  53. // meaningful in the context of a particular generation of a
  54. // particular object.
  55. Metageneration int64
  56. }
  57. // NewReader creates a new Reader to read the contents of the
  58. // object.
  59. // ErrObjectNotExist will be returned if the object is not found.
  60. //
  61. // The caller must call Close on the returned Reader when done reading.
  62. func (o *ObjectHandle) NewReader(ctx context.Context) (*Reader, error) {
  63. return o.NewRangeReader(ctx, 0, -1)
  64. }
  65. // NewRangeReader reads part of an object, reading at most length bytes
  66. // starting at the given offset. If length is negative, the object is read
  67. // until the end. If offset is negative, the object is read abs(offset) bytes
  68. // from the end, and length must also be negative to indicate all remaining
  69. // bytes will be read.
  70. //
  71. // If the object's metadata property "Content-Encoding" is set to "gzip" or satisfies
  72. // decompressive transcoding per https://cloud.google.com/storage/docs/transcoding
  73. // that file will be served back whole, regardless of the requested range as
  74. // Google Cloud Storage dictates.
  75. func (o *ObjectHandle) NewRangeReader(ctx context.Context, offset, length int64) (r *Reader, err error) {
  76. // This span covers the life of the reader. It is closed via the context
  77. // in Reader.Close.
  78. ctx = trace.StartSpan(ctx, "cloud.google.com/go/storage.Object.Reader")
  79. if err := o.validate(); err != nil {
  80. return nil, err
  81. }
  82. if offset < 0 && length >= 0 {
  83. return nil, fmt.Errorf("storage: invalid offset %d < 0 requires negative length", offset)
  84. }
  85. if o.conds != nil {
  86. if err := o.conds.validate("NewRangeReader"); err != nil {
  87. return nil, err
  88. }
  89. }
  90. opts := makeStorageOpts(true, o.retry, o.userProject)
  91. params := &newRangeReaderParams{
  92. bucket: o.bucket,
  93. object: o.object,
  94. gen: o.gen,
  95. offset: offset,
  96. length: length,
  97. encryptionKey: o.encryptionKey,
  98. conds: o.conds,
  99. readCompressed: o.readCompressed,
  100. }
  101. r, err = o.c.tc.NewRangeReader(ctx, params, opts...)
  102. // Pass the context so that the span can be closed in Reader.Close, or close the
  103. // span now if there is an error.
  104. if err == nil {
  105. r.ctx = ctx
  106. } else {
  107. trace.EndSpan(ctx, err)
  108. }
  109. return r, err
  110. }
  111. // decompressiveTranscoding returns true if the request was served decompressed
  112. // and different than its original storage form. This happens when the "Content-Encoding"
  113. // header is "gzip".
  114. // See:
  115. // - https://cloud.google.com/storage/docs/transcoding#transcoding_and_gzip
  116. // - https://github.com/googleapis/google-cloud-go/issues/1800
  117. func decompressiveTranscoding(res *http.Response) bool {
  118. // Decompressive Transcoding.
  119. return res.Header.Get("Content-Encoding") == "gzip" ||
  120. res.Header.Get("X-Goog-Stored-Content-Encoding") == "gzip"
  121. }
  122. func uncompressedByServer(res *http.Response) bool {
  123. // If the data is stored as gzip but is not encoded as gzip, then it
  124. // was uncompressed by the server.
  125. return res.Header.Get("X-Goog-Stored-Content-Encoding") == "gzip" &&
  126. res.Header.Get("Content-Encoding") != "gzip"
  127. }
  128. // parseCRC32c parses the crc32c hash from the X-Goog-Hash header.
  129. // It can parse headers in the form [crc32c=xxx md5=xxx] (XML responses) or the
  130. // form [crc32c=xxx,md5=xxx] (JSON responses). The md5 hash is ignored.
  131. func parseCRC32c(res *http.Response) (uint32, bool) {
  132. const prefix = "crc32c="
  133. for _, spec := range res.Header["X-Goog-Hash"] {
  134. values := strings.Split(spec, ",")
  135. for _, v := range values {
  136. if strings.HasPrefix(v, prefix) {
  137. c, err := decodeUint32(v[len(prefix):])
  138. if err == nil {
  139. return c, true
  140. }
  141. }
  142. }
  143. }
  144. return 0, false
  145. }
  146. // setConditionsHeaders sets precondition request headers for downloads
  147. // using the XML API. It assumes that the conditions have been validated.
  148. func setConditionsHeaders(headers http.Header, conds *Conditions) error {
  149. if conds == nil {
  150. return nil
  151. }
  152. if conds.MetagenerationMatch != 0 {
  153. headers.Set("x-goog-if-metageneration-match", fmt.Sprint(conds.MetagenerationMatch))
  154. }
  155. switch {
  156. case conds.GenerationMatch != 0:
  157. headers.Set("x-goog-if-generation-match", fmt.Sprint(conds.GenerationMatch))
  158. case conds.DoesNotExist:
  159. headers.Set("x-goog-if-generation-match", "0")
  160. }
  161. return nil
  162. }
  163. var emptyBody = ioutil.NopCloser(strings.NewReader(""))
  164. // Reader reads a Cloud Storage object.
  165. // It implements io.Reader.
  166. //
  167. // Typically, a Reader computes the CRC of the downloaded content and compares it to
  168. // the stored CRC, returning an error from Read if there is a mismatch. This integrity check
  169. // is skipped if transcoding occurs. See https://cloud.google.com/storage/docs/transcoding.
  170. type Reader struct {
  171. Attrs ReaderObjectAttrs
  172. seen, remain, size int64
  173. checkCRC bool // should we check the CRC?
  174. wantCRC uint32 // the CRC32c value the server sent in the header
  175. gotCRC uint32 // running crc
  176. reader io.ReadCloser
  177. ctx context.Context
  178. }
  179. // Close closes the Reader. It must be called when done reading.
  180. func (r *Reader) Close() error {
  181. err := r.reader.Close()
  182. trace.EndSpan(r.ctx, err)
  183. return err
  184. }
  185. func (r *Reader) Read(p []byte) (int, error) {
  186. n, err := r.reader.Read(p)
  187. if r.remain != -1 {
  188. r.remain -= int64(n)
  189. }
  190. if r.checkCRC {
  191. r.gotCRC = crc32.Update(r.gotCRC, crc32cTable, p[:n])
  192. // Check CRC here. It would be natural to check it in Close, but
  193. // everybody defers Close on the assumption that it doesn't return
  194. // anything worth looking at.
  195. if err == io.EOF {
  196. if r.gotCRC != r.wantCRC {
  197. return n, fmt.Errorf("storage: bad CRC on read: got %d, want %d",
  198. r.gotCRC, r.wantCRC)
  199. }
  200. }
  201. }
  202. return n, err
  203. }
  204. // Size returns the size of the object in bytes.
  205. // The returned value is always the same and is not affected by
  206. // calls to Read or Close.
  207. //
  208. // Deprecated: use Reader.Attrs.Size.
  209. func (r *Reader) Size() int64 {
  210. return r.Attrs.Size
  211. }
  212. // Remain returns the number of bytes left to read, or -1 if unknown.
  213. func (r *Reader) Remain() int64 {
  214. return r.remain
  215. }
  216. // ContentType returns the content type of the object.
  217. //
  218. // Deprecated: use Reader.Attrs.ContentType.
  219. func (r *Reader) ContentType() string {
  220. return r.Attrs.ContentType
  221. }
  222. // ContentEncoding returns the content encoding of the object.
  223. //
  224. // Deprecated: use Reader.Attrs.ContentEncoding.
  225. func (r *Reader) ContentEncoding() string {
  226. return r.Attrs.ContentEncoding
  227. }
  228. // CacheControl returns the cache control of the object.
  229. //
  230. // Deprecated: use Reader.Attrs.CacheControl.
  231. func (r *Reader) CacheControl() string {
  232. return r.Attrs.CacheControl
  233. }
  234. // LastModified returns the value of the Last-Modified header.
  235. //
  236. // Deprecated: use Reader.Attrs.LastModified.
  237. func (r *Reader) LastModified() (time.Time, error) {
  238. return r.Attrs.LastModified, nil
  239. }