-
Notifications
You must be signed in to change notification settings - Fork 3.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
chunks: decode varints directly from byte buffer; stop panicing on some corrupt inputs #7264
Changes from all commits
da65b11
dc4c78d
4c84b0b
3168c85
9ce2380
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
@@ -1,7 +1,6 @@ | ||||||||
package chunkenc | ||||||||
|
||||||||
import ( | ||||||||
"bufio" | ||||||||
"bytes" | ||||||||
"context" | ||||||||
"encoding/binary" | ||||||||
|
@@ -1114,12 +1113,14 @@ type bufferedIterator struct { | |||||||
origBytes []byte | ||||||||
stats *stats.Context | ||||||||
|
||||||||
bufReader *bufio.Reader | ||||||||
reader io.Reader | ||||||||
pool ReaderPool | ||||||||
reader io.Reader | ||||||||
pool ReaderPool | ||||||||
|
||||||||
err error | ||||||||
|
||||||||
readBuf [20]byte // Enough bytes to store two varints. | ||||||||
readBufValid int // How many bytes are left in readBuf from previous read. | ||||||||
|
||||||||
buf []byte // The buffer for a single entry. | ||||||||
currLine []byte // the current line, this is the same as the buffer but sliced the the line size. | ||||||||
currTs int64 | ||||||||
|
@@ -1134,7 +1135,6 @@ func newBufferedIterator(ctx context.Context, pool ReaderPool, b []byte) *buffer | |||||||
stats: stats, | ||||||||
origBytes: b, | ||||||||
reader: nil, // will be initialized later | ||||||||
bufReader: nil, // will be initialized later | ||||||||
pool: pool, | ||||||||
} | ||||||||
} | ||||||||
|
@@ -1146,8 +1146,12 @@ func (si *bufferedIterator) Next() bool { | |||||||
|
||||||||
if !si.closed && si.reader == nil { | ||||||||
// initialize reader now, hopefully reusing one of the previous readers | ||||||||
si.reader = si.pool.GetReader(bytes.NewBuffer(si.origBytes)) | ||||||||
si.bufReader = BufReaderPool.Get(si.reader) | ||||||||
var err error | ||||||||
si.reader, err = si.pool.GetReader(bytes.NewBuffer(si.origBytes)) | ||||||||
if err != nil { | ||||||||
si.err = err | ||||||||
return false | ||||||||
} | ||||||||
} | ||||||||
|
||||||||
ts, line, ok := si.moveNext() | ||||||||
|
@@ -1166,22 +1170,30 @@ func (si *bufferedIterator) Next() bool { | |||||||
|
||||||||
// moveNext moves the buffer to the next entry | ||||||||
func (si *bufferedIterator) moveNext() (int64, []byte, bool) { | ||||||||
ts, err := binary.ReadVarint(si.bufReader) | ||||||||
if err != nil { | ||||||||
if err != io.EOF { | ||||||||
si.err = err | ||||||||
} | ||||||||
return 0, nil, false | ||||||||
} | ||||||||
|
||||||||
l, err := binary.ReadUvarint(si.bufReader) | ||||||||
if err != nil { | ||||||||
if err != io.EOF { | ||||||||
si.err = err | ||||||||
return 0, nil, false | ||||||||
var ts int64 | ||||||||
var tWidth, lWidth, lineSize, lastAttempt int | ||||||||
for lWidth == 0 { // Read until both varints have enough bytes. | ||||||||
n, err := si.reader.Read(si.readBuf[si.readBufValid:]) | ||||||||
si.readBufValid += n | ||||||||
if err != nil { | ||||||||
if err != io.EOF { | ||||||||
si.err = err | ||||||||
return 0, nil, false | ||||||||
} | ||||||||
if si.readBufValid == 0 { // Got EOF and no data in the buffer. | ||||||||
return 0, nil, false | ||||||||
} | ||||||||
if si.readBufValid == lastAttempt { // Got EOF and could not parse same data last time. | ||||||||
si.err = fmt.Errorf("invalid data in chunk") | ||||||||
return 0, nil, false | ||||||||
} | ||||||||
} | ||||||||
var l uint64 | ||||||||
ts, tWidth = binary.Varint(si.readBuf[:si.readBufValid]) | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 💯 |
||||||||
l, lWidth = binary.Uvarint(si.readBuf[tWidth:si.readBufValid]) | ||||||||
lineSize = int(l) | ||||||||
lastAttempt = si.readBufValid | ||||||||
} | ||||||||
lineSize := int(l) | ||||||||
|
||||||||
if lineSize >= maxLineLength { | ||||||||
si.err = fmt.Errorf("line too long %d, maximum %d", lineSize, maxLineLength) | ||||||||
|
@@ -1199,19 +1211,25 @@ func (si *bufferedIterator) moveNext() (int64, []byte, bool) { | |||||||
return 0, nil, false | ||||||||
} | ||||||||
} | ||||||||
si.buf = si.buf[:lineSize] | ||||||||
// Take however many bytes are left in the read buffer. | ||||||||
n := copy(si.buf, si.readBuf[tWidth+lWidth:si.readBufValid]) | ||||||||
// Shift down what is still left in the fixed-size read buffer, if any. | ||||||||
si.readBufValid = copy(si.readBuf[:], si.readBuf[tWidth+lWidth+n:si.readBufValid]) | ||||||||
Comment on lines
+1217
to
+1218
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is impossible, since
Adjusting
Suggested change
Again, I think this is impossible, but this will make the code a little clearer. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Suppose the line was of length 0 bytes, or anything up to about 10. Then There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, yes this makes sense. I definitely missed it on my first pass; thanks for the help and great PR! |
||||||||
|
||||||||
// Then process reading the line. | ||||||||
n, err := si.bufReader.Read(si.buf[:lineSize]) | ||||||||
if err != nil && err != io.EOF { | ||||||||
si.err = err | ||||||||
return 0, nil, false | ||||||||
} | ||||||||
for n < lineSize { | ||||||||
r, err := si.bufReader.Read(si.buf[n:lineSize]) | ||||||||
if err != nil && err != io.EOF { | ||||||||
r, err := si.reader.Read(si.buf[n:lineSize]) | ||||||||
n += r | ||||||||
if err != nil { | ||||||||
// We might get EOF after reading enough bytes to fill the buffer, which is OK. | ||||||||
// EOF and zero bytes read when the buffer isn't full is an error. | ||||||||
if err == io.EOF && r != 0 { | ||||||||
continue | ||||||||
} | ||||||||
si.err = err | ||||||||
return 0, nil, false | ||||||||
} | ||||||||
n += r | ||||||||
} | ||||||||
return ts, si.buf[:lineSize], true | ||||||||
} | ||||||||
|
@@ -1231,10 +1249,6 @@ func (si *bufferedIterator) close() { | |||||||
si.pool.PutReader(si.reader) | ||||||||
si.reader = nil | ||||||||
} | ||||||||
if si.bufReader != nil { | ||||||||
BufReaderPool.Put(si.bufReader) | ||||||||
si.bufReader = nil | ||||||||
} | ||||||||
|
||||||||
if si.buf != nil { | ||||||||
BytesBufferPool.Put(si.buf) | ||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I realise this can loop forever on some corrupt inputs - if there are some bytes in the buffer but not enough to decode two varints. Need to detect this somehow and error.