Skip to content

Commit

Permalink
forEach iterates over index in stable order (#258)
Browse files Browse the repository at this point in the history
* forEach iterates over index in stable order
  • Loading branch information
willscott authored and rvagg committed Oct 22, 2021
1 parent 7c2e6ad commit d5ab3c5
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 3 deletions.
2 changes: 1 addition & 1 deletion v2/index/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ type (
// An index may contain multiple offsets corresponding to the same multihash, e.g. via duplicate blocks.
// In such cases, the given function may be called multiple times with the same multhihash but different offset.
//
// The order of calls to the given function is entirely index-specific.
// The order of calls to the given function is deterministic, but entirely index-specific.
ForEach(func(multihash.Multihash, uint64) error) error
}
)
Expand Down
8 changes: 7 additions & 1 deletion v2/index/indexsorted.go
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,13 @@ func (m *multiWidthIndex) Load(items []Record) error {
}

func (m *multiWidthIndex) forEachDigest(f func(digest []byte, offset uint64) error) error {
for _, swi := range *m {
sizes := make([]uint32, 0, len(*m))
for k := range *m {
sizes = append(sizes, k)
}
sort.Slice(sizes, func(i, j int) bool { return sizes[i] < sizes[j] })
for _, s := range sizes {
swi := (*m)[s]
if err := swi.forEachDigest(f); err != nil {
return err
}
Expand Down
8 changes: 7 additions & 1 deletion v2/index/mhindexsorted.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,13 @@ func (m *MultihashIndexSorted) GetAll(cid cid.Cid, f func(uint64) bool) error {

// ForEach calls f for every multihash and its associated offset stored by this index.
func (m *MultihashIndexSorted) ForEach(f func(mh multihash.Multihash, offset uint64) error) error {
for _, mwci := range *m {
sizes := make([]uint64, 0, len(*m))
for k := range *m {
sizes = append(sizes, k)
}
sort.Slice(sizes, func(i, j int) bool { return sizes[i] < sizes[j] })
for _, s := range sizes {
mwci := (*m)[s]
if err := mwci.forEach(f); err != nil {
return err
}
Expand Down
29 changes: 29 additions & 0 deletions v2/index/mhindexsorted_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,35 @@ func TestMultiWidthCodedIndex_MarshalUnmarshal(t *testing.T) {
requireContainsAll(t, umSubject, records)
}

func TestMultiWidthCodedIndex_StableIterate(t *testing.T) {
rng := rand.New(rand.NewSource(1414))
records := generateIndexRecords(t, multihash.SHA2_256, rng)
records = append(records, generateIndexRecords(t, multihash.SHA2_512, rng)...)
records = append(records, generateIndexRecords(t, multihash.IDENTITY, rng)...)

// Create a new mh sorted index and load randomly generated records into it.
subject, err := index.New(multicodec.CarMultihashIndexSorted)
require.NoError(t, err)
err = subject.Load(records)
require.NoError(t, err)

iterable := subject.(index.IterableIndex)
mh := make([]multihash.Multihash, 0, len(records))
require.NoError(t, iterable.ForEach(func(m multihash.Multihash, _ uint64) error {
mh = append(mh, m)
return nil
}))

for i := 0; i < 10; i++ {
candidate := make([]multihash.Multihash, 0, len(records))
require.NoError(t, iterable.ForEach(func(m multihash.Multihash, _ uint64) error {
candidate = append(candidate, m)
return nil
}))
require.Equal(t, mh, candidate)
}
}

func generateIndexRecords(t *testing.T, hasherCode uint64, rng *rand.Rand) []index.Record {
var records []index.Record
recordCount := rng.Intn(99) + 1 // Up to 100 records
Expand Down

0 comments on commit d5ab3c5

Please sign in to comment.