From c00b3525a6a1c4cead9cc26b8d453da4b0e85e52 Mon Sep 17 00:00:00 2001 From: A A <43576696+mocurin@users.noreply.github.com> Date: Fri, 1 Sep 2023 04:09:23 +0300 Subject: [PATCH] uint32 to uint64 in slice methods (#323) ## Problem I'm currently on Dgraph 21.12 unable to export my data. Export fails on just two nodes out of 7 with the "Unexpected EOF" error: ``` Dec 14 11:31:51 dm-dgraph-04 dgraph[224355]: I1214 11:31:51.840130 224355 log.go:34] Export [01h26m22s] Scan (12): ~2.1 TiB/2.5 TiB at 177 MiB/sec. Sent: 801.5 GiB at 231 MiB/sec. jemalloc: 7.5 GiB Dec 14 11:31:55 dm-dgraph-04 dgraph[224355]: W1214 11:31:55.408201 224355 log.go:36] Error while sending: unexpected EOF ``` Skipping rather long investigation of this issue I came to find length of slice, written to the Buffer during export exceed the size of uint32 (i've decoded varint before Value field in Badger KV struct with RDF's to get something around 4.5Gb, which is expected for a rather bloated reverse edge to the one of the most common nodes in my DB. Also count query returns 72 105 794 connected nodes which is, welp, quite a lot). Not to mention that working with `int` which is almost always is `int64` and then casually casting it to `uint32` w/o any checks or warnings is as bad as it gets. ## Solution Find any `4` and `Uint32` and carefully replace them with `8` and `Uint64`. As this happens only in slice-related methods the fix is quite easy. Locally tests run just fine, but i had to patch the sort one to accommodate for size changes. Also i did test 21.12-related badger version and tests run fine too. --- z/buffer.go | 16 ++++++++-------- z/buffer_test.go | 6 +++--- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/z/buffer.go b/z/buffer.go index fc363b89..a662d47c 100644 --- a/z/buffer.go +++ b/z/buffer.go @@ -253,8 +253,8 @@ func (b *Buffer) AllocateOffset(n int) int { } func (b *Buffer) writeLen(sz int) { - buf := b.Allocate(4) - binary.BigEndian.PutUint32(buf, uint32(sz)) + buf := b.Allocate(8) + binary.BigEndian.PutUint64(buf, uint64(sz)) } // SliceAllocate would encode the size provided into the buffer, followed by a call to Allocate, @@ -262,7 +262,7 @@ func (b *Buffer) writeLen(sz int) { // this big buffer. // Note that SliceAllocate should NOT be mixed with normal calls to Write. func (b *Buffer) SliceAllocate(sz int) []byte { - b.Grow(4 + sz) + b.Grow(8 + sz) b.writeLen(sz) return b.Allocate(sz) } @@ -394,7 +394,7 @@ func (s *sortHelper) merge(left, right []byte, start, end int) { rs = rawSlice(right) // We skip the first 4 bytes in the rawSlice, because that stores the length. - if s.less(ls[4:], rs[4:]) { + if s.less(ls[8:], rs[8:]) { copyLeft() } else { copyRight() @@ -467,8 +467,8 @@ func (b *Buffer) SortSliceBetween(start, end int, less LessFunc) { } func rawSlice(buf []byte) []byte { - sz := binary.BigEndian.Uint32(buf) - return buf[:4+int(sz)] + sz := binary.BigEndian.Uint64(buf) + return buf[:8+int(sz)] } // Slice would return the slice written at offset. @@ -477,8 +477,8 @@ func (b *Buffer) Slice(offset int) ([]byte, int) { return nil, -1 } - sz := binary.BigEndian.Uint32(b.buf[offset:]) - start := offset + 4 + sz := binary.BigEndian.Uint64(b.buf[offset:]) + start := offset + 8 next := start + int(sz) res := b.buf[start:next] if next >= int(b.offset) { diff --git a/z/buffer_test.go b/z/buffer_test.go index 4e67cdb9..361bda73 100644 --- a/z/buffer_test.go +++ b/z/buffer_test.go @@ -219,8 +219,8 @@ func TestBufferSort(t *testing.T) { } test := func(start, end int) { - start = buf.StartOffset() + 12*start - end = buf.StartOffset() + 12*end + start = buf.StartOffset() + 16*start + end = buf.StartOffset() + 16*end buf.SortSliceBetween(start, end, func(ls, rs []byte) bool { lhs := binary.BigEndian.Uint64(ls) rhs := binary.BigEndian.Uint64(rs) @@ -238,7 +238,7 @@ func TestBufferSort(t *testing.T) { last = uid count++ } - require.Equal(t, (end-start)/12, count) + require.Equal(t, (end-start)/16, count) } for i := 10; i <= N; i += 10 { test(i-10, i)