From d5ab3c5825f0e367724f2baa17f20915732f4b55 Mon Sep 17 00:00:00 2001 From: Will Date: Sat, 16 Oct 2021 16:26:42 -0700 Subject: [PATCH] forEach iterates over index in stable order (#258) * forEach iterates over index in stable order --- v2/index/index.go | 2 +- v2/index/indexsorted.go | 8 +++++++- v2/index/mhindexsorted.go | 8 +++++++- v2/index/mhindexsorted_test.go | 29 +++++++++++++++++++++++++++++ 4 files changed, 44 insertions(+), 3 deletions(-) diff --git a/v2/index/index.go b/v2/index/index.go index 8e447d0f..998a17a0 100644 --- a/v2/index/index.go +++ b/v2/index/index.go @@ -87,7 +87,7 @@ type ( // An index may contain multiple offsets corresponding to the same multihash, e.g. via duplicate blocks. // In such cases, the given function may be called multiple times with the same multhihash but different offset. // - // The order of calls to the given function is entirely index-specific. + // The order of calls to the given function is deterministic, but entirely index-specific. ForEach(func(multihash.Multihash, uint64) error) error } ) diff --git a/v2/index/indexsorted.go b/v2/index/indexsorted.go index 6b6c5a68..86994dd8 100644 --- a/v2/index/indexsorted.go +++ b/v2/index/indexsorted.go @@ -235,7 +235,13 @@ func (m *multiWidthIndex) Load(items []Record) error { } func (m *multiWidthIndex) forEachDigest(f func(digest []byte, offset uint64) error) error { - for _, swi := range *m { + sizes := make([]uint32, 0, len(*m)) + for k := range *m { + sizes = append(sizes, k) + } + sort.Slice(sizes, func(i, j int) bool { return sizes[i] < sizes[j] }) + for _, s := range sizes { + swi := (*m)[s] if err := swi.forEachDigest(f); err != nil { return err } diff --git a/v2/index/mhindexsorted.go b/v2/index/mhindexsorted.go index f81e3a94..e3cae3d0 100644 --- a/v2/index/mhindexsorted.go +++ b/v2/index/mhindexsorted.go @@ -157,7 +157,13 @@ func (m *MultihashIndexSorted) GetAll(cid cid.Cid, f func(uint64) bool) error { // ForEach calls f for every multihash and its associated offset stored by this index. func (m *MultihashIndexSorted) ForEach(f func(mh multihash.Multihash, offset uint64) error) error { - for _, mwci := range *m { + sizes := make([]uint64, 0, len(*m)) + for k := range *m { + sizes = append(sizes, k) + } + sort.Slice(sizes, func(i, j int) bool { return sizes[i] < sizes[j] }) + for _, s := range sizes { + mwci := (*m)[s] if err := mwci.forEach(f); err != nil { return err } diff --git a/v2/index/mhindexsorted_test.go b/v2/index/mhindexsorted_test.go index b5ef7b89..e02ba059 100644 --- a/v2/index/mhindexsorted_test.go +++ b/v2/index/mhindexsorted_test.go @@ -46,6 +46,35 @@ func TestMultiWidthCodedIndex_MarshalUnmarshal(t *testing.T) { requireContainsAll(t, umSubject, records) } +func TestMultiWidthCodedIndex_StableIterate(t *testing.T) { + rng := rand.New(rand.NewSource(1414)) + records := generateIndexRecords(t, multihash.SHA2_256, rng) + records = append(records, generateIndexRecords(t, multihash.SHA2_512, rng)...) + records = append(records, generateIndexRecords(t, multihash.IDENTITY, rng)...) + + // Create a new mh sorted index and load randomly generated records into it. + subject, err := index.New(multicodec.CarMultihashIndexSorted) + require.NoError(t, err) + err = subject.Load(records) + require.NoError(t, err) + + iterable := subject.(index.IterableIndex) + mh := make([]multihash.Multihash, 0, len(records)) + require.NoError(t, iterable.ForEach(func(m multihash.Multihash, _ uint64) error { + mh = append(mh, m) + return nil + })) + + for i := 0; i < 10; i++ { + candidate := make([]multihash.Multihash, 0, len(records)) + require.NoError(t, iterable.ForEach(func(m multihash.Multihash, _ uint64) error { + candidate = append(candidate, m) + return nil + })) + require.Equal(t, mh, candidate) + } +} + func generateIndexRecords(t *testing.T, hasherCode uint64, rng *rand.Rand) []index.Record { var records []index.Record recordCount := rng.Intn(99) + 1 // Up to 100 records