| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322 |
- // Package multihash is the Go implementation of
- // https://github.com/multiformats/multihash, or self-describing
- // hashes.
- package multihash
- import (
- "encoding/hex"
- "errors"
- "fmt"
- "math"
- b58 "github.com/mr-tron/base58/base58"
- "github.com/multiformats/go-varint"
- )
- // errors
- var (
- ErrUnknownCode = errors.New("unknown multihash code")
- ErrTooShort = errors.New("multihash too short. must be >= 2 bytes")
- ErrTooLong = errors.New("multihash too long. must be < 129 bytes")
- ErrLenNotSupported = errors.New("multihash does not yet support digests longer than 127 bytes")
- ErrInvalidMultihash = errors.New("input isn't valid multihash")
- ErrVarintBufferShort = errors.New("uvarint: buffer too small")
- ErrVarintTooLong = errors.New("uvarint: varint too big (max 64bit)")
- )
- // ErrInconsistentLen is returned when a decoded multihash has an inconsistent length
- type ErrInconsistentLen struct {
- dm DecodedMultihash
- lengthFound int
- }
- func (e ErrInconsistentLen) Error() string {
- return fmt.Sprintf("multihash length inconsistent: expected %d; got %d", e.dm.Length, e.lengthFound)
- }
- // constants
- const (
- IDENTITY = 0x00
- // Deprecated: use IDENTITY
- ID = IDENTITY
- SHA1 = 0x11
- SHA2_256 = 0x12
- SHA2_512 = 0x13
- SHA3_224 = 0x17
- SHA3_256 = 0x16
- SHA3_384 = 0x15
- SHA3_512 = 0x14
- SHA3 = SHA3_512
- KECCAK_224 = 0x1A
- KECCAK_256 = 0x1B
- KECCAK_384 = 0x1C
- KECCAK_512 = 0x1D
- BLAKE3 = 0x1E
- SHAKE_128 = 0x18
- SHAKE_256 = 0x19
- BLAKE2B_MIN = 0xb201
- BLAKE2B_MAX = 0xb240
- BLAKE2S_MIN = 0xb241
- BLAKE2S_MAX = 0xb260
- MD5 = 0xd5
- DBL_SHA2_256 = 0x56
- MURMUR3X64_64 = 0x22
- // Deprecated: use MURMUR3X64_64
- MURMUR3 = MURMUR3X64_64
- SHA2_256_TRUNC254_PADDED = 0x1012
- X11 = 0x1100
- POSEIDON_BLS12_381_A1_FC1 = 0xb401
- )
- func init() {
- // Add blake2b (64 codes)
- for c := uint64(BLAKE2B_MIN); c <= BLAKE2B_MAX; c++ {
- n := c - BLAKE2B_MIN + 1
- name := fmt.Sprintf("blake2b-%d", n*8)
- Names[name] = c
- Codes[c] = name
- }
- // Add blake2s (32 codes)
- for c := uint64(BLAKE2S_MIN); c <= BLAKE2S_MAX; c++ {
- n := c - BLAKE2S_MIN + 1
- name := fmt.Sprintf("blake2s-%d", n*8)
- Names[name] = c
- Codes[c] = name
- }
- }
- // Names maps the name of a hash to the code
- var Names = map[string]uint64{
- "identity": IDENTITY,
- "sha1": SHA1,
- "sha2-256": SHA2_256,
- "sha2-512": SHA2_512,
- "sha3": SHA3_512,
- "sha3-224": SHA3_224,
- "sha3-256": SHA3_256,
- "sha3-384": SHA3_384,
- "sha3-512": SHA3_512,
- "dbl-sha2-256": DBL_SHA2_256,
- "murmur3-x64-64": MURMUR3X64_64,
- "keccak-224": KECCAK_224,
- "keccak-256": KECCAK_256,
- "keccak-384": KECCAK_384,
- "keccak-512": KECCAK_512,
- "blake3": BLAKE3,
- "shake-128": SHAKE_128,
- "shake-256": SHAKE_256,
- "sha2-256-trunc254-padded": SHA2_256_TRUNC254_PADDED,
- "x11": X11,
- "md5": MD5,
- "poseidon-bls12_381-a2-fc1": POSEIDON_BLS12_381_A1_FC1,
- }
- // Codes maps a hash code to it's name
- var Codes = map[uint64]string{
- IDENTITY: "identity",
- SHA1: "sha1",
- SHA2_256: "sha2-256",
- SHA2_512: "sha2-512",
- SHA3_224: "sha3-224",
- SHA3_256: "sha3-256",
- SHA3_384: "sha3-384",
- SHA3_512: "sha3-512",
- DBL_SHA2_256: "dbl-sha2-256",
- MURMUR3X64_64: "murmur3-x64-64",
- KECCAK_224: "keccak-224",
- KECCAK_256: "keccak-256",
- KECCAK_384: "keccak-384",
- KECCAK_512: "keccak-512",
- BLAKE3: "blake3",
- SHAKE_128: "shake-128",
- SHAKE_256: "shake-256",
- SHA2_256_TRUNC254_PADDED: "sha2-256-trunc254-padded",
- X11: "x11",
- POSEIDON_BLS12_381_A1_FC1: "poseidon-bls12_381-a2-fc1",
- MD5: "md5",
- }
- // reads a varint from buf and returns bytes read.
- func uvarint(buf []byte) (uint64, []byte, error) {
- n, c, err := varint.FromUvarint(buf)
- if err != nil {
- return n, buf, err
- }
- if c == 0 {
- return n, buf, ErrVarintBufferShort
- } else if c < 0 {
- return n, buf[-c:], ErrVarintTooLong
- } else {
- return n, buf[c:], nil
- }
- }
- // DecodedMultihash represents a parsed multihash and allows
- // easy access to the different parts of a multihash.
- type DecodedMultihash struct {
- Code uint64
- Name string
- Length int // Length is just int as it is type of len() opearator
- Digest []byte // Digest holds the raw multihash bytes
- }
- // Multihash is byte slice with the following form:
- // <hash function code><digest size><hash function output>.
- // See the spec for more information.
- type Multihash []byte
- // HexString returns the hex-encoded representation of a multihash.
- func (m Multihash) HexString() string {
- return hex.EncodeToString([]byte(m))
- }
- // String is an alias to HexString().
- func (m Multihash) String() string {
- return m.HexString()
- }
- // FromHexString parses a hex-encoded multihash.
- func FromHexString(s string) (Multihash, error) {
- b, err := hex.DecodeString(s)
- if err != nil {
- return Multihash{}, err
- }
- return Cast(b)
- }
- // B58String returns the B58-encoded representation of a multihash.
- func (m Multihash) B58String() string {
- return b58.Encode([]byte(m))
- }
- // FromB58String parses a B58-encoded multihash.
- func FromB58String(s string) (m Multihash, err error) {
- b, err := b58.Decode(s)
- if err != nil {
- return Multihash{}, ErrInvalidMultihash
- }
- return Cast(b)
- }
- // Cast casts a buffer onto a multihash, and returns an error
- // if it does not work.
- func Cast(buf []byte) (Multihash, error) {
- _, err := Decode(buf)
- if err != nil {
- return Multihash{}, err
- }
- return Multihash(buf), nil
- }
- // Decode parses multihash bytes into a DecodedMultihash.
- func Decode(buf []byte) (*DecodedMultihash, error) {
- // outline decode allowing the &dm expression to be inlined into the caller.
- // This moves the heap allocation into the caller and if the caller doesn't
- // leak dm the compiler will use a stack allocation instead.
- // If you do not outline this &dm always heap allocate since the pointer is
- // returned which cause a heap allocation because Decode's stack frame is
- // about to disapear.
- dm, err := decode(buf)
- if err != nil {
- return nil, err
- }
- return &dm, nil
- }
- func decode(buf []byte) (dm DecodedMultihash, err error) {
- rlen, code, hdig, err := readMultihashFromBuf(buf)
- if err != nil {
- return DecodedMultihash{}, err
- }
- dm = DecodedMultihash{
- Code: code,
- Name: Codes[code],
- Length: len(hdig),
- Digest: hdig,
- }
- if len(buf) != rlen {
- return dm, ErrInconsistentLen{dm, rlen}
- }
- return dm, nil
- }
- // Encode a hash digest along with the specified function code.
- // Note: the length is derived from the length of the digest itself.
- //
- // The error return is legacy; it is always nil.
- func Encode(buf []byte, code uint64) ([]byte, error) {
- // FUTURE: this function always causes heap allocs... but when used, this value is almost always going to be appended to another buffer (either as part of CID creation, or etc) -- should this whole function be rethought and alternatives offered?
- newBuf := make([]byte, varint.UvarintSize(code)+varint.UvarintSize(uint64(len(buf)))+len(buf))
- n := varint.PutUvarint(newBuf, code)
- n += varint.PutUvarint(newBuf[n:], uint64(len(buf)))
- copy(newBuf[n:], buf)
- return newBuf, nil
- }
- // EncodeName is like Encode() but providing a string name
- // instead of a numeric code. See Names for allowed values.
- func EncodeName(buf []byte, name string) ([]byte, error) {
- return Encode(buf, Names[name])
- }
- // readMultihashFromBuf reads a multihash from the given buffer, returning the
- // individual pieces of the multihash.
- // Note: the returned digest is a slice over the passed in data and should be
- // copied if the buffer will be reused
- func readMultihashFromBuf(buf []byte) (int, uint64, []byte, error) {
- initBufLength := len(buf)
- if initBufLength < 2 {
- return 0, 0, nil, ErrTooShort
- }
- var err error
- var code, length uint64
- code, buf, err = uvarint(buf)
- if err != nil {
- return 0, 0, nil, err
- }
- length, buf, err = uvarint(buf)
- if err != nil {
- return 0, 0, nil, err
- }
- if length > math.MaxInt32 {
- return 0, 0, nil, errors.New("digest too long, supporting only <= 2^31-1")
- }
- if int(length) > len(buf) {
- return 0, 0, nil, errors.New("length greater than remaining number of bytes in buffer")
- }
- // rlen is the advertised size of the CID
- rlen := (initBufLength - len(buf)) + int(length)
- return rlen, code, buf[:length], nil
- }
- // MHFromBytes reads a multihash from the given byte buffer, returning the
- // number of bytes read as well as the multihash
- func MHFromBytes(buf []byte) (int, Multihash, error) {
- nr, _, _, err := readMultihashFromBuf(buf)
- if err != nil {
- return 0, nil, err
- }
- return nr, Multihash(buf[:nr]), nil
- }
|