Skip to content

Commit

Permalink
Export GetNextSize and implement ReadNext
Browse files Browse the repository at this point in the history
It is useful to be able to efficiently copy objects without
decoding them.

My use case is filtering when I already know the indices of
the objects I want to keep, and for rewriting a dictionary
of objects as a column of objects.

This commit:

1. Exports `GetNextSize`.
2. Adds a method `ReadNext(p)` to `*Reader`.

I wasn't sure about exporting `GetNextSize`, but I can't see the
harm in it, and it may be useful for finer-grained control in
the copying.

I also experimented with a `NextReader() io.Reader` method which
returned `io.LimitReader(m.R, GetNextSize()), but for my use case
this was twice as slow and also did not handle nested objects.
In principle that might be useful for very large simple objects,
but is not included in this PR.
  • Loading branch information
pwaller committed Nov 11, 2016
1 parent ad0ff2e commit 2cdb467
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 3 deletions.
26 changes: 24 additions & 2 deletions msgp/read.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package msgp

import (
"fmt"
"io"
"math"
"sync"
Expand Down Expand Up @@ -146,6 +147,26 @@ func (m *Reader) Read(p []byte) (int, error) {
return m.R.Read(p)
}

// ReadNext reads the raw bytes for the next object on the wire into p.
// If p is not large enough, an error is returned. See GetNextSize.
func (m *Reader) ReadNext(p []byte) (int, error) {
sz, _, err := GetNextSize(m.R)
if err != nil {
return 0, err
}
if uintptr(len(p)) < sz {
return 0, fmt.Errorf("p not big enough (%d < %d)", len(p), sz)
}
n, err := m.R.ReadFull(p[:sz])
if err != nil {
return 0, err
}
if n != int(sz) {
return 0, fmt.Errorf("wrong # bytes read (%d != %d)", n, int64(sz))
}
return n, err
}

// ReadFull implements `io.ReadFull`
func (m *Reader) ReadFull(p []byte) (int, error) {
return m.R.ReadFull(p)
Expand Down Expand Up @@ -194,12 +215,13 @@ func (m *Reader) IsNil() bool {
return err == nil && p[0] == mnil
}

// GetNextSize returns the size of the next object on the wire.
// returns (obj size, obj elements, error)
// only maps and arrays have non-zero obj elements
//
// use uintptr b/c it's guaranteed to be large enough
// to hold whatever we can fit in memory.
func getNextSize(r *fwd.Reader) (uintptr, uintptr, error) {
func GetNextSize(r *fwd.Reader) (uintptr, uintptr, error) {
b, err := r.Peek(1)
if err != nil {
return 0, 0, err
Expand Down Expand Up @@ -261,7 +283,7 @@ func (m *Reader) Skip() error {
return err
}
} else {
v, o, err = getNextSize(m.R)
v, o, err = GetNextSize(m.R)
if err != nil {
return err
}
Expand Down
2 changes: 1 addition & 1 deletion msgp/read_bytes.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ func (r Raw) Msgsize() int {
}

func appendNext(f *Reader, d *[]byte) error {
amt, o, err := getNextSize(f.R)
amt, o, err := GetNextSize(f.R)
if err != nil {
return err
}
Expand Down

0 comments on commit 2cdb467

Please sign in to comment.