multihash.go 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. // Package multihash is the Go implementation of
  2. // https://github.com/multiformats/multihash, or self-describing
  3. // hashes.
  4. package multihash
  5. import (
  6. "encoding/hex"
  7. "errors"
  8. "fmt"
  9. "math"
  10. b58 "github.com/mr-tron/base58/base58"
  11. "github.com/multiformats/go-varint"
  12. )
  13. // errors
  14. var (
  15. ErrUnknownCode = errors.New("unknown multihash code")
  16. ErrTooShort = errors.New("multihash too short. must be >= 2 bytes")
  17. ErrTooLong = errors.New("multihash too long. must be < 129 bytes")
  18. ErrLenNotSupported = errors.New("multihash does not yet support digests longer than 127 bytes")
  19. ErrInvalidMultihash = errors.New("input isn't valid multihash")
  20. ErrVarintBufferShort = errors.New("uvarint: buffer too small")
  21. ErrVarintTooLong = errors.New("uvarint: varint too big (max 64bit)")
  22. )
  23. // ErrInconsistentLen is returned when a decoded multihash has an inconsistent length
  24. type ErrInconsistentLen struct {
  25. dm DecodedMultihash
  26. lengthFound int
  27. }
  28. func (e ErrInconsistentLen) Error() string {
  29. return fmt.Sprintf("multihash length inconsistent: expected %d; got %d", e.dm.Length, e.lengthFound)
  30. }
  31. // constants
  32. const (
  33. IDENTITY = 0x00
  34. // Deprecated: use IDENTITY
  35. ID = IDENTITY
  36. SHA1 = 0x11
  37. SHA2_256 = 0x12
  38. SHA2_512 = 0x13
  39. SHA3_224 = 0x17
  40. SHA3_256 = 0x16
  41. SHA3_384 = 0x15
  42. SHA3_512 = 0x14
  43. SHA3 = SHA3_512
  44. KECCAK_224 = 0x1A
  45. KECCAK_256 = 0x1B
  46. KECCAK_384 = 0x1C
  47. KECCAK_512 = 0x1D
  48. BLAKE3 = 0x1E
  49. SHAKE_128 = 0x18
  50. SHAKE_256 = 0x19
  51. BLAKE2B_MIN = 0xb201
  52. BLAKE2B_MAX = 0xb240
  53. BLAKE2S_MIN = 0xb241
  54. BLAKE2S_MAX = 0xb260
  55. MD5 = 0xd5
  56. DBL_SHA2_256 = 0x56
  57. MURMUR3X64_64 = 0x22
  58. // Deprecated: use MURMUR3X64_64
  59. MURMUR3 = MURMUR3X64_64
  60. SHA2_256_TRUNC254_PADDED = 0x1012
  61. X11 = 0x1100
  62. POSEIDON_BLS12_381_A1_FC1 = 0xb401
  63. )
  64. func init() {
  65. // Add blake2b (64 codes)
  66. for c := uint64(BLAKE2B_MIN); c <= BLAKE2B_MAX; c++ {
  67. n := c - BLAKE2B_MIN + 1
  68. name := fmt.Sprintf("blake2b-%d", n*8)
  69. Names[name] = c
  70. Codes[c] = name
  71. }
  72. // Add blake2s (32 codes)
  73. for c := uint64(BLAKE2S_MIN); c <= BLAKE2S_MAX; c++ {
  74. n := c - BLAKE2S_MIN + 1
  75. name := fmt.Sprintf("blake2s-%d", n*8)
  76. Names[name] = c
  77. Codes[c] = name
  78. }
  79. }
  80. // Names maps the name of a hash to the code
  81. var Names = map[string]uint64{
  82. "identity": IDENTITY,
  83. "sha1": SHA1,
  84. "sha2-256": SHA2_256,
  85. "sha2-512": SHA2_512,
  86. "sha3": SHA3_512,
  87. "sha3-224": SHA3_224,
  88. "sha3-256": SHA3_256,
  89. "sha3-384": SHA3_384,
  90. "sha3-512": SHA3_512,
  91. "dbl-sha2-256": DBL_SHA2_256,
  92. "murmur3-x64-64": MURMUR3X64_64,
  93. "keccak-224": KECCAK_224,
  94. "keccak-256": KECCAK_256,
  95. "keccak-384": KECCAK_384,
  96. "keccak-512": KECCAK_512,
  97. "blake3": BLAKE3,
  98. "shake-128": SHAKE_128,
  99. "shake-256": SHAKE_256,
  100. "sha2-256-trunc254-padded": SHA2_256_TRUNC254_PADDED,
  101. "x11": X11,
  102. "md5": MD5,
  103. "poseidon-bls12_381-a2-fc1": POSEIDON_BLS12_381_A1_FC1,
  104. }
  105. // Codes maps a hash code to it's name
  106. var Codes = map[uint64]string{
  107. IDENTITY: "identity",
  108. SHA1: "sha1",
  109. SHA2_256: "sha2-256",
  110. SHA2_512: "sha2-512",
  111. SHA3_224: "sha3-224",
  112. SHA3_256: "sha3-256",
  113. SHA3_384: "sha3-384",
  114. SHA3_512: "sha3-512",
  115. DBL_SHA2_256: "dbl-sha2-256",
  116. MURMUR3X64_64: "murmur3-x64-64",
  117. KECCAK_224: "keccak-224",
  118. KECCAK_256: "keccak-256",
  119. KECCAK_384: "keccak-384",
  120. KECCAK_512: "keccak-512",
  121. BLAKE3: "blake3",
  122. SHAKE_128: "shake-128",
  123. SHAKE_256: "shake-256",
  124. SHA2_256_TRUNC254_PADDED: "sha2-256-trunc254-padded",
  125. X11: "x11",
  126. POSEIDON_BLS12_381_A1_FC1: "poseidon-bls12_381-a2-fc1",
  127. MD5: "md5",
  128. }
  129. // reads a varint from buf and returns bytes read.
  130. func uvarint(buf []byte) (uint64, []byte, error) {
  131. n, c, err := varint.FromUvarint(buf)
  132. if err != nil {
  133. return n, buf, err
  134. }
  135. if c == 0 {
  136. return n, buf, ErrVarintBufferShort
  137. } else if c < 0 {
  138. return n, buf[-c:], ErrVarintTooLong
  139. } else {
  140. return n, buf[c:], nil
  141. }
  142. }
  143. // DecodedMultihash represents a parsed multihash and allows
  144. // easy access to the different parts of a multihash.
  145. type DecodedMultihash struct {
  146. Code uint64
  147. Name string
  148. Length int // Length is just int as it is type of len() opearator
  149. Digest []byte // Digest holds the raw multihash bytes
  150. }
  151. // Multihash is byte slice with the following form:
  152. // <hash function code><digest size><hash function output>.
  153. // See the spec for more information.
  154. type Multihash []byte
  155. // HexString returns the hex-encoded representation of a multihash.
  156. func (m Multihash) HexString() string {
  157. return hex.EncodeToString([]byte(m))
  158. }
  159. // String is an alias to HexString().
  160. func (m Multihash) String() string {
  161. return m.HexString()
  162. }
  163. // FromHexString parses a hex-encoded multihash.
  164. func FromHexString(s string) (Multihash, error) {
  165. b, err := hex.DecodeString(s)
  166. if err != nil {
  167. return Multihash{}, err
  168. }
  169. return Cast(b)
  170. }
  171. // B58String returns the B58-encoded representation of a multihash.
  172. func (m Multihash) B58String() string {
  173. return b58.Encode([]byte(m))
  174. }
  175. // FromB58String parses a B58-encoded multihash.
  176. func FromB58String(s string) (m Multihash, err error) {
  177. b, err := b58.Decode(s)
  178. if err != nil {
  179. return Multihash{}, ErrInvalidMultihash
  180. }
  181. return Cast(b)
  182. }
  183. // Cast casts a buffer onto a multihash, and returns an error
  184. // if it does not work.
  185. func Cast(buf []byte) (Multihash, error) {
  186. _, err := Decode(buf)
  187. if err != nil {
  188. return Multihash{}, err
  189. }
  190. return Multihash(buf), nil
  191. }
  192. // Decode parses multihash bytes into a DecodedMultihash.
  193. func Decode(buf []byte) (*DecodedMultihash, error) {
  194. // outline decode allowing the &dm expression to be inlined into the caller.
  195. // This moves the heap allocation into the caller and if the caller doesn't
  196. // leak dm the compiler will use a stack allocation instead.
  197. // If you do not outline this &dm always heap allocate since the pointer is
  198. // returned which cause a heap allocation because Decode's stack frame is
  199. // about to disapear.
  200. dm, err := decode(buf)
  201. if err != nil {
  202. return nil, err
  203. }
  204. return &dm, nil
  205. }
  206. func decode(buf []byte) (dm DecodedMultihash, err error) {
  207. rlen, code, hdig, err := readMultihashFromBuf(buf)
  208. if err != nil {
  209. return DecodedMultihash{}, err
  210. }
  211. dm = DecodedMultihash{
  212. Code: code,
  213. Name: Codes[code],
  214. Length: len(hdig),
  215. Digest: hdig,
  216. }
  217. if len(buf) != rlen {
  218. return dm, ErrInconsistentLen{dm, rlen}
  219. }
  220. return dm, nil
  221. }
  222. // Encode a hash digest along with the specified function code.
  223. // Note: the length is derived from the length of the digest itself.
  224. //
  225. // The error return is legacy; it is always nil.
  226. func Encode(buf []byte, code uint64) ([]byte, error) {
  227. // FUTURE: this function always causes heap allocs... but when used, this value is almost always going to be appended to another buffer (either as part of CID creation, or etc) -- should this whole function be rethought and alternatives offered?
  228. newBuf := make([]byte, varint.UvarintSize(code)+varint.UvarintSize(uint64(len(buf)))+len(buf))
  229. n := varint.PutUvarint(newBuf, code)
  230. n += varint.PutUvarint(newBuf[n:], uint64(len(buf)))
  231. copy(newBuf[n:], buf)
  232. return newBuf, nil
  233. }
  234. // EncodeName is like Encode() but providing a string name
  235. // instead of a numeric code. See Names for allowed values.
  236. func EncodeName(buf []byte, name string) ([]byte, error) {
  237. return Encode(buf, Names[name])
  238. }
  239. // readMultihashFromBuf reads a multihash from the given buffer, returning the
  240. // individual pieces of the multihash.
  241. // Note: the returned digest is a slice over the passed in data and should be
  242. // copied if the buffer will be reused
  243. func readMultihashFromBuf(buf []byte) (int, uint64, []byte, error) {
  244. initBufLength := len(buf)
  245. if initBufLength < 2 {
  246. return 0, 0, nil, ErrTooShort
  247. }
  248. var err error
  249. var code, length uint64
  250. code, buf, err = uvarint(buf)
  251. if err != nil {
  252. return 0, 0, nil, err
  253. }
  254. length, buf, err = uvarint(buf)
  255. if err != nil {
  256. return 0, 0, nil, err
  257. }
  258. if length > math.MaxInt32 {
  259. return 0, 0, nil, errors.New("digest too long, supporting only <= 2^31-1")
  260. }
  261. if int(length) > len(buf) {
  262. return 0, 0, nil, errors.New("length greater than remaining number of bytes in buffer")
  263. }
  264. // rlen is the advertised size of the CID
  265. rlen := (initBufLength - len(buf)) + int(length)
  266. return rlen, code, buf[:length], nil
  267. }
  268. // MHFromBytes reads a multihash from the given byte buffer, returning the
  269. // number of bytes read as well as the multihash
  270. func MHFromBytes(buf []byte) (int, Multihash, error) {
  271. nr, _, _, err := readMultihashFromBuf(buf)
  272. if err != nil {
  273. return 0, nil, err
  274. }
  275. return nr, Multihash(buf[:nr]), nil
  276. }