Skip to content

Commit

Permalink
obj storage: optimize azure reader (pingcap#56852)
Browse files Browse the repository at this point in the history
  • Loading branch information
D3Hunter authored Oct 27, 2024
1 parent 0fca61c commit 7292117
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 18 deletions.
64 changes: 47 additions & 17 deletions br/pkg/storage/azblob.go
Original file line number Diff line number Diff line change
Expand Up @@ -593,6 +593,8 @@ type azblobObjectReader struct {
ctx context.Context

cpkInfo *blob.CPKInfo
// opened lazily
reader io.ReadCloser
}

// Read implement the io.Reader interface.
Expand All @@ -604,30 +606,27 @@ func (r *azblobObjectReader) Read(p []byte) (n int, err error) {
if maxCnt == 0 {
return 0, io.EOF
}
resp, err := r.blobClient.DownloadStream(r.ctx, &blob.DownloadStreamOptions{
Range: blob.HTTPRange{
Offset: r.pos,
Count: maxCnt,
},

CPKInfo: r.cpkInfo,
})
if err != nil {
return 0, errors.Annotatef(err, "Failed to read data from azure blob, data info: pos='%d', count='%d'", r.pos, maxCnt)
if r.reader == nil {
if err2 := r.reopenReader(); err2 != nil {
return 0, err2
}
}
body := resp.NewRetryReader(r.ctx, &blob.RetryReaderOptions{
MaxRetries: azblobRetryTimes,
})
n, err = body.Read(p)
buf := p[:maxCnt]
n, err = r.reader.Read(buf)
if err != nil && err != io.EOF {
return 0, errors.Annotatef(err, "Failed to read data from azure blob response, data info: pos='%d', count='%d'", r.pos, maxCnt)
}
r.pos += int64(n)
return n, body.Close()
return n, nil
}

// Close implement the io.Closer interface.
func (*azblobObjectReader) Close() error {
func (r *azblobObjectReader) Close() error {
if r.reader != nil {
err := errors.Trace(r.reader.Close())
r.reader = nil
return err
}
return nil
}

Expand All @@ -653,13 +652,44 @@ func (r *azblobObjectReader) Seek(offset int64, whence int) (int64, error) {
return 0, errors.Annotatef(berrors.ErrStorageUnknown, "Seek: invalid whence '%d'", whence)
}

if realOffset < 0 {
if realOffset < 0 || realOffset > r.totalSize {
return 0, errors.Annotatef(berrors.ErrInvalidArgument, "Seek: offset is %d, but length of content is only %d", realOffset, r.totalSize)
}
if realOffset == r.pos {
return r.pos, nil
}
r.pos = realOffset
// azblob reader can only read forward, so we need to reopen the reader
if err := r.reopenReader(); err != nil {
return 0, err
}
return r.pos, nil
}

func (r *azblobObjectReader) reopenReader() error {
if r.reader != nil {
err := errors.Trace(r.reader.Close())
if err != nil {
log.Warn("failed to close azblob reader", zap.Error(err))
}
}

resp, err := r.blobClient.DownloadStream(r.ctx, &blob.DownloadStreamOptions{
Range: blob.HTTPRange{
Offset: r.pos,
},
CPKInfo: r.cpkInfo,
})
if err != nil {
return errors.Annotatef(err, "Failed to read data from azure blob, data info: pos='%d'", r.pos)
}
body := resp.NewRetryReader(r.ctx, &blob.RetryReaderOptions{
MaxRetries: azblobRetryTimes,
})
r.reader = body
return nil
}

func (r *azblobObjectReader) GetFileSize() (int64, error) {
return r.totalSize, nil
}
Expand Down
13 changes: 12 additions & 1 deletion br/pkg/storage/azblob_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ func TestAzblob(t *testing.T) {
builder := &sharedKeyAzuriteClientBuilder{}
skip, err := createContainer(ctx, builder, options.Bucket)
if skip || err != nil {
t.Log("azurite is not running, skip test")
t.Skip("azurite is not running, skip test")
return
}
require.NoError(t, err)
Expand Down Expand Up @@ -123,23 +123,34 @@ func TestAzblob(t *testing.T) {

efr, err := azblobStorage.Open(ctx, "key2", nil)
require.NoError(t, err)
size, err := efr.GetFileSize()
require.NoError(t, err)
require.EqualValues(t, 33, size)

realReader := efr.(*azblobObjectReader)
require.Nil(t, realReader.reader)

p := make([]byte, 10)
n, err := efr.Read(p)
require.NoError(t, err)
require.Equal(t, 10, n)
require.Equal(t, "data222233", string(p))
require.NotNil(t, realReader.reader)
oldInnerReader := realReader.reader

p = make([]byte, 40)
n, err = efr.Read(p)
require.NoError(t, err)
require.Equal(t, 23, n)
require.Equal(t, "46757222222222289722222", string(p[:23]))
require.Same(t, oldInnerReader, realReader.reader)

p = make([]byte, 5)
offs, err := efr.Seek(3, io.SeekStart)
require.NoError(t, err)
require.Equal(t, int64(3), offs)
// reader reopened
require.NotSame(t, oldInnerReader, realReader.reader)

n, err = efr.Read(p)
require.NoError(t, err)
Expand Down

0 comments on commit 7292117

Please sign in to comment.