fasthash.go 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. // Copyright 2019 Yunion
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package fileutils2
  15. import (
  16. "crypto/md5"
  17. "crypto/sha1"
  18. "crypto/sha256"
  19. "crypto/sha512"
  20. "fmt"
  21. "hash"
  22. "io"
  23. "os"
  24. "yunion.io/x/log"
  25. )
  26. const (
  27. BLOCK_SIZE = 32 * 1024 // 2**15
  28. BLOCK_WIDTH = 15
  29. )
  30. func SumHashes(sums [][]byte) []byte {
  31. minLen := 0
  32. for i := 0; i < len(sums); i += 1 {
  33. if minLen == 0 || minLen > len(sums[i]) {
  34. minLen = len(sums[i])
  35. }
  36. }
  37. ret := make([]byte, minLen)
  38. for j := 0; j < minLen; j += 1 {
  39. for i := 0; i < len(sums); i += 1 {
  40. ret[j] += sums[i][j]
  41. }
  42. }
  43. return ret
  44. }
  45. func FileFastHash(filename string, hashAlgo []hash.Hash, rate int) ([][]byte, error) {
  46. size := FileSize(filename)
  47. blockCount := size >> BLOCK_WIDTH
  48. samples := int(blockCount / int64(rate))
  49. // log.Infof("block_count: %d samples: %d", blockCount, samples)
  50. if samples == 0 {
  51. return FileHash(filename, hashAlgo)
  52. }
  53. fp, err := os.Open(filename)
  54. if err != nil {
  55. log.Errorf("open file for hash fail %s", err)
  56. return nil, err
  57. }
  58. defer fp.Close()
  59. buf := make([]byte, BLOCK_SIZE)
  60. offset := int64(0)
  61. for i := 0; i < samples; i += 1 {
  62. // log.Infof("%dth offset %d %d", i, offset, size)
  63. _, err := fp.Seek(offset, io.SeekStart)
  64. if err != nil {
  65. log.Errorf("seek error %s", err)
  66. return nil, err
  67. }
  68. n, err := fp.Read(buf)
  69. if err != nil {
  70. log.Errorf("read error %s", err)
  71. return nil, err
  72. }
  73. if n != BLOCK_SIZE {
  74. return nil, fmt.Errorf("fail to read all???")
  75. }
  76. for i := 0; i < len(hashAlgo); i += 1 {
  77. hashAlgo[i].Write(buf)
  78. }
  79. offset += (int64(rate) << BLOCK_WIDTH)
  80. }
  81. sums := make([][]byte, len(hashAlgo))
  82. for i := 0; i < len(hashAlgo); i += 1 {
  83. sums[i] = hashAlgo[i].Sum(nil)
  84. }
  85. return sums, nil
  86. }
  87. func FastCheckSum(filePath string) (string, error) {
  88. hashes := []hash.Hash{md5.New(), sha1.New(), sha256.New(), sha512.New()}
  89. results, err := FileFastHash(filePath, hashes, 128)
  90. if err != nil {
  91. return "", err
  92. }
  93. sum := SumHashes(results)
  94. return fmt.Sprintf("%x", sum), nil
  95. }