Skip to content

Commit

Permalink
Merge pull request #149 from ipld/feat/configure-hash-on-load
Browse files Browse the repository at this point in the history
Add option to tell link system storage is trusted and we can skip hash on read
  • Loading branch information
warpfork committed Mar 24, 2021
2 parents 678a428 + 6a262a3 commit dc342a9
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 37 deletions.
59 changes: 22 additions & 37 deletions linking.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,50 +52,35 @@ func (lsys *LinkSystem) Fill(lnkCtx LinkContext, lnk Link, na NodeAssembler) err
return ErrLinkingSetup{"no storage configured for reading", io.ErrClosedPipe} // REVIEW: better cause?
}
// Open storage, read it, verify it, and feed the codec to assemble the nodes.
// We have two paths through this: if a `Bytes() []byte` method is handy, we'll assume it's faster than going through reader.
// These diverge significantly, because if we give up on streaming, it makes sense to do the full hash check first before decoding at all.
reader, err := lsys.StorageReadOpener(lnkCtx, lnk)
if err != nil {
return err
}
if buf, ok := reader.(interface{ Bytes() []byte }); ok {
// Flush everything to the hasher in one big slice.
hasher.Write(buf.Bytes())
hash := hasher.Sum(nil)
// Bit of a jig to get something we can do the hash equality check on.
lnk2 := lnk.Prototype().BuildLink(hash)
if lnk2 != lnk {
return ErrHashMismatch{Actual: lnk2, Expected: lnk}
}
// Perform decoding (knowing the hash is already verified).
// Note that the decoder recieves the same reader as we started with,
// and as a result, is also free to detect a `Bytes() []byte` accessor and do any optimizations it wishes to based on that.
// TrustaedStorage indicates the data coming out of this reader has already been hashed and verified earlier.
// As a result, we can skip rehashing it
if lsys.TrustedStorage {
return decoder(na, reader)
} else {
// Tee the stream so that the hasher is fed as the unmarshal progresses through the stream.
// Note: the tee means *the decoder doesn't get to see the original reader type*.
// This is part of why the `Bytes() []byte` branch above is useful; the decoder loses any ability to do a similar check
// and optimization when the tee is in the middle.
tee := io.TeeReader(reader, hasher)
decodeErr := decoder(na, tee)
if decodeErr != nil { // It is important to security to check the hash before returning any other observation about the content.
// This copy is for data remaining the block that wasn't already pulled through the TeeReader by the decoder.
_, err := io.Copy(hasher, reader)
if err != nil {
return err
}
}
hash := hasher.Sum(nil)
// Bit of a jig to get something we can do the hash equality check on.
lnk2 := lnk.Prototype().BuildLink(hash)
if lnk2 != lnk {
return ErrHashMismatch{Actual: lnk2, Expected: lnk}
}
if decodeErr != nil {
return decodeErr
}
// Tee the stream so that the hasher is fed as the unmarshal progresses through the stream.
tee := io.TeeReader(reader, hasher)
decodeErr := decoder(na, tee)
if decodeErr != nil { // It is important to security to check the hash before returning any other observation about the content.
// This copy is for data remaining the block that wasn't already pulled through the TeeReader by the decoder.
_, err := io.Copy(hasher, reader)
if err != nil {
return err
}
return nil
}
hash := hasher.Sum(nil)
// Bit of a jig to get something we can do the hash equality check on.
lnk2 := lnk.Prototype().BuildLink(hash)
if lnk2 != lnk {
return ErrHashMismatch{Actual: lnk2, Expected: lnk}
}
if decodeErr != nil {
return decodeErr
}
return nil
}

func (lsys *LinkSystem) MustFill(lnkCtx LinkContext, lnk Link, na NodeAssembler) {
Expand Down
1 change: 1 addition & 0 deletions linksystem.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ type LinkSystem struct {
HasherChooser func(LinkPrototype) (hash.Hash, error)
StorageWriteOpener BlockWriteOpener
StorageReadOpener BlockReadOpener
TrustedStorage bool
}

// The following two types define the two directions of transform that a codec can be expected to perform:
Expand Down

0 comments on commit dc342a9

Please sign in to comment.