tar.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508
  1. //go:build windows
  2. // +build windows
  3. package backuptar
  4. import (
  5. "archive/tar"
  6. "encoding/base64"
  7. "fmt"
  8. "io"
  9. "path/filepath"
  10. "strconv"
  11. "strings"
  12. "time"
  13. "github.com/Microsoft/go-winio"
  14. "golang.org/x/sys/windows"
  15. )
  16. //nolint:deadcode,varcheck // keep unused constants for potential future use
  17. const (
  18. cISUID = 0004000 // Set uid
  19. cISGID = 0002000 // Set gid
  20. cISVTX = 0001000 // Save text (sticky bit)
  21. cISDIR = 0040000 // Directory
  22. cISFIFO = 0010000 // FIFO
  23. cISREG = 0100000 // Regular file
  24. cISLNK = 0120000 // Symbolic link
  25. cISBLK = 0060000 // Block special file
  26. cISCHR = 0020000 // Character special file
  27. cISSOCK = 0140000 // Socket
  28. )
  29. const (
  30. hdrFileAttributes = "MSWINDOWS.fileattr"
  31. hdrSecurityDescriptor = "MSWINDOWS.sd"
  32. hdrRawSecurityDescriptor = "MSWINDOWS.rawsd"
  33. hdrMountPoint = "MSWINDOWS.mountpoint"
  34. hdrEaPrefix = "MSWINDOWS.xattr."
  35. hdrCreationTime = "LIBARCHIVE.creationtime"
  36. )
  37. // zeroReader is an io.Reader that always returns 0s.
  38. type zeroReader struct{}
  39. func (zeroReader) Read(b []byte) (int, error) {
  40. for i := range b {
  41. b[i] = 0
  42. }
  43. return len(b), nil
  44. }
  45. func copySparse(t *tar.Writer, br *winio.BackupStreamReader) error {
  46. curOffset := int64(0)
  47. for {
  48. bhdr, err := br.Next()
  49. if err == io.EOF { //nolint:errorlint
  50. err = io.ErrUnexpectedEOF
  51. }
  52. if err != nil {
  53. return err
  54. }
  55. if bhdr.Id != winio.BackupSparseBlock {
  56. return fmt.Errorf("unexpected stream %d", bhdr.Id)
  57. }
  58. // We can't seek backwards, since we have already written that data to the tar.Writer.
  59. if bhdr.Offset < curOffset {
  60. return fmt.Errorf("cannot seek back from %d to %d", curOffset, bhdr.Offset)
  61. }
  62. // archive/tar does not support writing sparse files
  63. // so just write zeroes to catch up to the current offset.
  64. if _, err = io.CopyN(t, zeroReader{}, bhdr.Offset-curOffset); err != nil {
  65. return fmt.Errorf("seek to offset %d: %w", bhdr.Offset, err)
  66. }
  67. if bhdr.Size == 0 {
  68. // A sparse block with size = 0 is used to mark the end of the sparse blocks.
  69. break
  70. }
  71. n, err := io.Copy(t, br)
  72. if err != nil {
  73. return err
  74. }
  75. if n != bhdr.Size {
  76. return fmt.Errorf("copied %d bytes instead of %d at offset %d", n, bhdr.Size, bhdr.Offset)
  77. }
  78. curOffset = bhdr.Offset + n
  79. }
  80. return nil
  81. }
  82. // BasicInfoHeader creates a tar header from basic file information.
  83. func BasicInfoHeader(name string, size int64, fileInfo *winio.FileBasicInfo) *tar.Header {
  84. hdr := &tar.Header{
  85. Format: tar.FormatPAX,
  86. Name: filepath.ToSlash(name),
  87. Size: size,
  88. Typeflag: tar.TypeReg,
  89. ModTime: time.Unix(0, fileInfo.LastWriteTime.Nanoseconds()),
  90. ChangeTime: time.Unix(0, fileInfo.ChangeTime.Nanoseconds()),
  91. AccessTime: time.Unix(0, fileInfo.LastAccessTime.Nanoseconds()),
  92. PAXRecords: make(map[string]string),
  93. }
  94. hdr.PAXRecords[hdrFileAttributes] = fmt.Sprintf("%d", fileInfo.FileAttributes)
  95. hdr.PAXRecords[hdrCreationTime] = formatPAXTime(time.Unix(0, fileInfo.CreationTime.Nanoseconds()))
  96. if (fileInfo.FileAttributes & windows.FILE_ATTRIBUTE_DIRECTORY) != 0 {
  97. hdr.Mode |= cISDIR
  98. hdr.Size = 0
  99. hdr.Typeflag = tar.TypeDir
  100. }
  101. return hdr
  102. }
  103. // SecurityDescriptorFromTarHeader reads the SDDL associated with the header of the current file
  104. // from the tar header and returns the security descriptor into a byte slice.
  105. func SecurityDescriptorFromTarHeader(hdr *tar.Header) ([]byte, error) {
  106. if sdraw, ok := hdr.PAXRecords[hdrRawSecurityDescriptor]; ok {
  107. sd, err := base64.StdEncoding.DecodeString(sdraw)
  108. if err != nil {
  109. // Not returning sd as-is in the error-case, as base64.DecodeString
  110. // may return partially decoded data (not nil or empty slice) in case
  111. // of a failure: https://github.com/golang/go/blob/go1.17.7/src/encoding/base64/base64.go#L382-L387
  112. return nil, err
  113. }
  114. return sd, nil
  115. }
  116. // Maintaining old SDDL-based behavior for backward compatibility. All new
  117. // tar headers written by this library will have raw binary for the security
  118. // descriptor.
  119. if sddl, ok := hdr.PAXRecords[hdrSecurityDescriptor]; ok {
  120. return winio.SddlToSecurityDescriptor(sddl)
  121. }
  122. return nil, nil
  123. }
  124. // ExtendedAttributesFromTarHeader reads the EAs associated with the header of the
  125. // current file from the tar header and returns it as a byte slice.
  126. func ExtendedAttributesFromTarHeader(hdr *tar.Header) ([]byte, error) {
  127. var eas []winio.ExtendedAttribute //nolint:prealloc // len(eas) <= len(hdr.PAXRecords); prealloc is wasteful
  128. for k, v := range hdr.PAXRecords {
  129. if !strings.HasPrefix(k, hdrEaPrefix) {
  130. continue
  131. }
  132. data, err := base64.StdEncoding.DecodeString(v)
  133. if err != nil {
  134. return nil, err
  135. }
  136. eas = append(eas, winio.ExtendedAttribute{
  137. Name: k[len(hdrEaPrefix):],
  138. Value: data,
  139. })
  140. }
  141. var eaData []byte
  142. var err error
  143. if len(eas) != 0 {
  144. eaData, err = winio.EncodeExtendedAttributes(eas)
  145. if err != nil {
  146. return nil, err
  147. }
  148. }
  149. return eaData, nil
  150. }
  151. // EncodeReparsePointFromTarHeader reads the ReparsePoint structure from the tar header
  152. // and encodes it into a byte slice. The file for which this function is called must be a
  153. // symlink.
  154. func EncodeReparsePointFromTarHeader(hdr *tar.Header) []byte {
  155. _, isMountPoint := hdr.PAXRecords[hdrMountPoint]
  156. rp := winio.ReparsePoint{
  157. Target: filepath.FromSlash(hdr.Linkname),
  158. IsMountPoint: isMountPoint,
  159. }
  160. return winio.EncodeReparsePoint(&rp)
  161. }
  162. // WriteTarFileFromBackupStream writes a file to a tar writer using data from a Win32 backup stream.
  163. //
  164. // This encodes Win32 metadata as tar pax vendor extensions starting with MSWINDOWS.
  165. //
  166. // The additional Win32 metadata is:
  167. //
  168. // - MSWINDOWS.fileattr: The Win32 file attributes, as a decimal value
  169. // - MSWINDOWS.rawsd: The Win32 security descriptor, in raw binary format
  170. // - MSWINDOWS.mountpoint: If present, this is a mount point and not a symlink, even though the type is '2' (symlink)
  171. func WriteTarFileFromBackupStream(t *tar.Writer, r io.Reader, name string, size int64, fileInfo *winio.FileBasicInfo) error {
  172. name = filepath.ToSlash(name)
  173. hdr := BasicInfoHeader(name, size, fileInfo)
  174. // If r can be seeked, then this function is two-pass: pass 1 collects the
  175. // tar header data, and pass 2 copies the data stream. If r cannot be
  176. // seeked, then some header data (in particular EAs) will be silently lost.
  177. var (
  178. restartPos int64
  179. err error
  180. )
  181. sr, readTwice := r.(io.Seeker)
  182. if readTwice {
  183. if restartPos, err = sr.Seek(0, io.SeekCurrent); err != nil {
  184. readTwice = false
  185. }
  186. }
  187. br := winio.NewBackupStreamReader(r)
  188. var dataHdr *winio.BackupHeader
  189. for dataHdr == nil {
  190. bhdr, err := br.Next()
  191. if err == io.EOF { //nolint:errorlint
  192. break
  193. }
  194. if err != nil {
  195. return err
  196. }
  197. switch bhdr.Id {
  198. case winio.BackupData:
  199. hdr.Mode |= cISREG
  200. if !readTwice {
  201. dataHdr = bhdr
  202. }
  203. case winio.BackupSecurity:
  204. sd, err := io.ReadAll(br)
  205. if err != nil {
  206. return err
  207. }
  208. hdr.PAXRecords[hdrRawSecurityDescriptor] = base64.StdEncoding.EncodeToString(sd)
  209. case winio.BackupReparseData:
  210. hdr.Mode |= cISLNK
  211. hdr.Typeflag = tar.TypeSymlink
  212. reparseBuffer, _ := io.ReadAll(br)
  213. rp, err := winio.DecodeReparsePoint(reparseBuffer)
  214. if err != nil {
  215. return err
  216. }
  217. if rp.IsMountPoint {
  218. hdr.PAXRecords[hdrMountPoint] = "1"
  219. }
  220. hdr.Linkname = rp.Target
  221. case winio.BackupEaData:
  222. eab, err := io.ReadAll(br)
  223. if err != nil {
  224. return err
  225. }
  226. eas, err := winio.DecodeExtendedAttributes(eab)
  227. if err != nil {
  228. return err
  229. }
  230. for _, ea := range eas {
  231. // Use base64 encoding for the binary value. Note that there
  232. // is no way to encode the EA's flags, since their use doesn't
  233. // make any sense for persisted EAs.
  234. hdr.PAXRecords[hdrEaPrefix+ea.Name] = base64.StdEncoding.EncodeToString(ea.Value)
  235. }
  236. case winio.BackupAlternateData, winio.BackupLink, winio.BackupPropertyData, winio.BackupObjectId, winio.BackupTxfsData:
  237. // ignore these streams
  238. default:
  239. return fmt.Errorf("%s: unknown stream ID %d", name, bhdr.Id)
  240. }
  241. }
  242. err = t.WriteHeader(hdr)
  243. if err != nil {
  244. return err
  245. }
  246. if readTwice {
  247. // Get back to the data stream.
  248. if _, err = sr.Seek(restartPos, io.SeekStart); err != nil {
  249. return err
  250. }
  251. for dataHdr == nil {
  252. bhdr, err := br.Next()
  253. if err == io.EOF { //nolint:errorlint
  254. break
  255. }
  256. if err != nil {
  257. return err
  258. }
  259. if bhdr.Id == winio.BackupData {
  260. dataHdr = bhdr
  261. }
  262. }
  263. }
  264. // The logic for copying file contents is fairly complicated due to the need for handling sparse files,
  265. // and the weird ways they are represented by BackupRead. A normal file will always either have a data stream
  266. // with size and content, or no data stream at all (if empty). However, for a sparse file, the content can also
  267. // be represented using a series of sparse block streams following the data stream. Additionally, the way sparse
  268. // files are handled by BackupRead has changed in the OS recently. The specifics of the representation are described
  269. // in the list at the bottom of this block comment.
  270. //
  271. // Sparse files can be represented in four different ways, based on the specifics of the file.
  272. // - Size = 0:
  273. // Previously: BackupRead yields no data stream and no sparse block streams.
  274. // Recently: BackupRead yields a data stream with size = 0. There are no following sparse block streams.
  275. // - Size > 0, no allocated ranges:
  276. // BackupRead yields a data stream with size = 0. Following is a single sparse block stream with
  277. // size = 0 and offset = <file size>.
  278. // - Size > 0, one allocated range:
  279. // BackupRead yields a data stream with size = <file size> containing the file contents. There are no
  280. // sparse block streams. This is the case if you take a normal file with contents and simply set the
  281. // sparse flag on it.
  282. // - Size > 0, multiple allocated ranges:
  283. // BackupRead yields a data stream with size = 0. Following are sparse block streams for each allocated
  284. // range of the file containing the range contents. Finally there is a sparse block stream with
  285. // size = 0 and offset = <file size>.
  286. if dataHdr != nil { //nolint:nestif // todo: reduce nesting complexity
  287. // A data stream was found. Copy the data.
  288. // We assume that we will either have a data stream size > 0 XOR have sparse block streams.
  289. if dataHdr.Size > 0 || (dataHdr.Attributes&winio.StreamSparseAttributes) == 0 {
  290. if size != dataHdr.Size {
  291. return fmt.Errorf("%s: mismatch between file size %d and header size %d", name, size, dataHdr.Size)
  292. }
  293. if _, err = io.Copy(t, br); err != nil {
  294. return fmt.Errorf("%s: copying contents from data stream: %w", name, err)
  295. }
  296. } else if size > 0 {
  297. // As of a recent OS change, BackupRead now returns a data stream for empty sparse files.
  298. // These files have no sparse block streams, so skip the copySparse call if file size = 0.
  299. if err = copySparse(t, br); err != nil {
  300. return fmt.Errorf("%s: copying contents from sparse block stream: %w", name, err)
  301. }
  302. }
  303. }
  304. // Look for streams after the data stream. The only ones we handle are alternate data streams.
  305. // Other streams may have metadata that could be serialized, but the tar header has already
  306. // been written. In practice, this means that we don't get EA or TXF metadata.
  307. for {
  308. bhdr, err := br.Next()
  309. if err == io.EOF { //nolint:errorlint
  310. break
  311. }
  312. if err != nil {
  313. return err
  314. }
  315. switch bhdr.Id {
  316. case winio.BackupAlternateData:
  317. if (bhdr.Attributes & winio.StreamSparseAttributes) != 0 {
  318. // Unsupported for now, since the size of the alternate stream is not present
  319. // in the backup stream until after the data has been read.
  320. return fmt.Errorf("%s: tar of sparse alternate data streams is unsupported", name)
  321. }
  322. altName := strings.TrimSuffix(bhdr.Name, ":$DATA")
  323. hdr = &tar.Header{
  324. Format: hdr.Format,
  325. Name: name + altName,
  326. Mode: hdr.Mode,
  327. Typeflag: tar.TypeReg,
  328. Size: bhdr.Size,
  329. ModTime: hdr.ModTime,
  330. AccessTime: hdr.AccessTime,
  331. ChangeTime: hdr.ChangeTime,
  332. }
  333. err = t.WriteHeader(hdr)
  334. if err != nil {
  335. return err
  336. }
  337. _, err = io.Copy(t, br)
  338. if err != nil {
  339. return err
  340. }
  341. case winio.BackupEaData, winio.BackupLink, winio.BackupPropertyData, winio.BackupObjectId, winio.BackupTxfsData:
  342. // ignore these streams
  343. default:
  344. return fmt.Errorf("%s: unknown stream ID %d after data", name, bhdr.Id)
  345. }
  346. }
  347. return nil
  348. }
  349. // FileInfoFromHeader retrieves basic Win32 file information from a tar header, using the additional metadata written by
  350. // WriteTarFileFromBackupStream.
  351. func FileInfoFromHeader(hdr *tar.Header) (name string, size int64, fileInfo *winio.FileBasicInfo, err error) {
  352. name = hdr.Name
  353. if hdr.Typeflag == tar.TypeReg {
  354. size = hdr.Size
  355. }
  356. fileInfo = &winio.FileBasicInfo{
  357. LastAccessTime: windows.NsecToFiletime(hdr.AccessTime.UnixNano()),
  358. LastWriteTime: windows.NsecToFiletime(hdr.ModTime.UnixNano()),
  359. ChangeTime: windows.NsecToFiletime(hdr.ChangeTime.UnixNano()),
  360. // Default to ModTime, we'll pull hdrCreationTime below if present
  361. CreationTime: windows.NsecToFiletime(hdr.ModTime.UnixNano()),
  362. }
  363. if attrStr, ok := hdr.PAXRecords[hdrFileAttributes]; ok {
  364. attr, err := strconv.ParseUint(attrStr, 10, 32)
  365. if err != nil {
  366. return "", 0, nil, err
  367. }
  368. fileInfo.FileAttributes = uint32(attr)
  369. } else {
  370. if hdr.Typeflag == tar.TypeDir {
  371. fileInfo.FileAttributes |= windows.FILE_ATTRIBUTE_DIRECTORY
  372. }
  373. }
  374. if creationTimeStr, ok := hdr.PAXRecords[hdrCreationTime]; ok {
  375. creationTime, err := parsePAXTime(creationTimeStr)
  376. if err != nil {
  377. return "", 0, nil, err
  378. }
  379. fileInfo.CreationTime = windows.NsecToFiletime(creationTime.UnixNano())
  380. }
  381. return name, size, fileInfo, err
  382. }
  383. // WriteBackupStreamFromTarFile writes a Win32 backup stream from the current tar file. Since this function may process multiple
  384. // tar file entries in order to collect all the alternate data streams for the file, it returns the next
  385. // tar file that was not processed, or io.EOF is there are no more.
  386. func WriteBackupStreamFromTarFile(w io.Writer, t *tar.Reader, hdr *tar.Header) (*tar.Header, error) {
  387. bw := winio.NewBackupStreamWriter(w)
  388. sd, err := SecurityDescriptorFromTarHeader(hdr)
  389. if err != nil {
  390. return nil, err
  391. }
  392. if len(sd) != 0 {
  393. bhdr := winio.BackupHeader{
  394. Id: winio.BackupSecurity,
  395. Size: int64(len(sd)),
  396. }
  397. err := bw.WriteHeader(&bhdr)
  398. if err != nil {
  399. return nil, err
  400. }
  401. _, err = bw.Write(sd)
  402. if err != nil {
  403. return nil, err
  404. }
  405. }
  406. eadata, err := ExtendedAttributesFromTarHeader(hdr)
  407. if err != nil {
  408. return nil, err
  409. }
  410. if len(eadata) != 0 {
  411. bhdr := winio.BackupHeader{
  412. Id: winio.BackupEaData,
  413. Size: int64(len(eadata)),
  414. }
  415. err = bw.WriteHeader(&bhdr)
  416. if err != nil {
  417. return nil, err
  418. }
  419. _, err = bw.Write(eadata)
  420. if err != nil {
  421. return nil, err
  422. }
  423. }
  424. if hdr.Typeflag == tar.TypeSymlink {
  425. reparse := EncodeReparsePointFromTarHeader(hdr)
  426. bhdr := winio.BackupHeader{
  427. Id: winio.BackupReparseData,
  428. Size: int64(len(reparse)),
  429. }
  430. err := bw.WriteHeader(&bhdr)
  431. if err != nil {
  432. return nil, err
  433. }
  434. _, err = bw.Write(reparse)
  435. if err != nil {
  436. return nil, err
  437. }
  438. }
  439. if hdr.Typeflag == tar.TypeReg {
  440. bhdr := winio.BackupHeader{
  441. Id: winio.BackupData,
  442. Size: hdr.Size,
  443. }
  444. err := bw.WriteHeader(&bhdr)
  445. if err != nil {
  446. return nil, err
  447. }
  448. _, err = io.Copy(bw, t)
  449. if err != nil {
  450. return nil, err
  451. }
  452. }
  453. // Copy all the alternate data streams and return the next non-ADS header.
  454. for {
  455. ahdr, err := t.Next()
  456. if err != nil {
  457. return nil, err
  458. }
  459. if ahdr.Typeflag != tar.TypeReg || !strings.HasPrefix(ahdr.Name, hdr.Name+":") {
  460. return ahdr, nil
  461. }
  462. bhdr := winio.BackupHeader{
  463. Id: winio.BackupAlternateData,
  464. Size: ahdr.Size,
  465. Name: ahdr.Name[len(hdr.Name):] + ":$DATA",
  466. }
  467. err = bw.WriteHeader(&bhdr)
  468. if err != nil {
  469. return nil, err
  470. }
  471. _, err = io.Copy(bw, t)
  472. if err != nil {
  473. return nil, err
  474. }
  475. }
  476. }