Skip to content
This repository has been archived by the owner on Sep 11, 2020. It is now read-only.

plumbing: packfile, apply small object reading optimization also for delta objects #1121

Merged
merged 12 commits into from
Apr 24, 2019
Merged
149 changes: 107 additions & 42 deletions plumbing/format/packfile/packfile.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,20 +76,18 @@ func (p *Packfile) Get(h plumbing.Hash) (plumbing.EncodedObject, error) {
return nil, err
}

return p.GetByOffset(offset)
return p.objectAtOffset(offset, h)
}

// GetByOffset retrieves the encoded object from the packfile with the given
// GetByOffset retrieves the encoded object from the packfile at the given
// offset.
func (p *Packfile) GetByOffset(o int64) (plumbing.EncodedObject, error) {
hash, err := p.FindHash(o)
if err == nil {
if obj, ok := p.deltaBaseCache.Get(hash); ok {
return obj, nil
}
if err != nil {
return nil, err
}

return p.objectAtOffset(o)
return p.objectAtOffset(o, hash)
}

// GetSizeByOffset retrieves the size of the encoded object from the
Expand Down Expand Up @@ -122,6 +120,13 @@ func (p *Packfile) nextObjectHeader() (*ObjectHeader, error) {
return h, err
}

func (p *Packfile) getDeltaObjectSize(buf *bytes.Buffer) int64 {
delta := buf.Bytes()
_, delta = decodeLEB128(delta) // skip src size
sz, _ := decodeLEB128(delta)
return int64(sz)
}

func (p *Packfile) getObjectSize(h *ObjectHeader) (int64, error) {
switch h.Type {
case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject:
Expand All @@ -135,10 +140,7 @@ func (p *Packfile) getObjectSize(h *ObjectHeader) (int64, error) {
return 0, err
}

delta := buf.Bytes()
_, delta = decodeLEB128(delta) // skip src size
sz, _ := decodeLEB128(delta)
return int64(sz), nil
return p.getDeltaObjectSize(buf), nil
default:
return 0, ErrInvalidObject.AddDetails("type %q", h.Type)
}
Expand Down Expand Up @@ -176,10 +178,16 @@ func (p *Packfile) getObjectType(h *ObjectHeader) (typ plumbing.ObjectType, err
err = ErrInvalidObject.AddDetails("type %q", h.Type)
}

p.offsetToType[h.Offset] = typ

return
}

func (p *Packfile) objectAtOffset(offset int64) (plumbing.EncodedObject, error) {
func (p *Packfile) objectAtOffset(offset int64, hash plumbing.Hash) (plumbing.EncodedObject, error) {
if obj, ok := p.cacheGet(hash); ok {
return obj, nil
}

h, err := p.objectHeaderAtOffset(offset)
if err != nil {
if err == io.EOF || isInvalid(err) {
Expand All @@ -188,27 +196,54 @@ func (p *Packfile) objectAtOffset(offset int64) (plumbing.EncodedObject, error)
return nil, err
}

return p.getNextObject(h, hash)
}

func (p *Packfile) getNextObject(h *ObjectHeader, hash plumbing.Hash) (plumbing.EncodedObject, error) {
var err error

// If we have no filesystem, we will return a MemoryObject instead
// of an FSObject.
if p.fs == nil {
return p.getNextObject(h)
return p.getNextMemoryObject(h)
}

// If the object is not a delta and it's small enough then read it
// completely into memory now since it is already read from disk
// into buffer anyway.
if h.Length <= smallObjectThreshold && h.Type != plumbing.OFSDeltaObject && h.Type != plumbing.REFDeltaObject {
return p.getNextObject(h)
}
// If the object is small enough then read it completely into memory now since
// it is already read from disk into buffer anyway. For delta objects we want
// to perform the optimization too, but we have to be careful about applying
// small deltas on big objects.
var size int64
if h.Length <= smallObjectThreshold {
if h.Type != plumbing.OFSDeltaObject && h.Type != plumbing.REFDeltaObject {
return p.getNextMemoryObject(h)
}

hash, err := p.FindHash(h.Offset)
if err != nil {
return nil, err
}
// For delta objects we read the delta data and apply the small object
// optimization only if the expanded version of the object still meets
// the small object threshold condition.
buf := bufPool.Get().(*bytes.Buffer)
buf.Reset()
if _, _, err := p.s.NextObject(buf); err != nil {
return nil, err
}
defer bufPool.Put(buf)

size, err := p.getObjectSize(h)
if err != nil {
return nil, err
size = p.getDeltaObjectSize(buf)
if size <= smallObjectThreshold {
var obj = new(plumbing.MemoryObject)
obj.SetSize(size)
if h.Type == plumbing.REFDeltaObject {
err = p.fillREFDeltaObjectContentWithBuffer(obj, h.Reference, buf)
} else {
err = p.fillOFSDeltaObjectContentWithBuffer(obj, h.OffsetReference, buf)
}
return obj, err
}
} else {
size, err = p.getObjectSize(h)
if err != nil {
return nil, err
}
}

typ, err := p.getObjectType(h)
Expand Down Expand Up @@ -249,15 +284,17 @@ func (p *Packfile) getObjectContent(offset int64) (io.ReadCloser, error) {
return nil, err
}

obj, err := p.getNextObject(h)
// getObjectContent is called from FSObject, so we have to explicitly
// get memory object here to avoid recursive cycle
obj, err := p.getNextMemoryObject(h)
if err != nil {
return nil, err
}

return obj.Reader()
}

func (p *Packfile) getNextObject(h *ObjectHeader) (plumbing.EncodedObject, error) {
func (p *Packfile) getNextMemoryObject(h *ObjectHeader) (plumbing.EncodedObject, error) {
var obj = new(plumbing.MemoryObject)
obj.SetSize(h.Length)
obj.SetType(h.Type)
Expand All @@ -278,6 +315,8 @@ func (p *Packfile) getNextObject(h *ObjectHeader) (plumbing.EncodedObject, error
return nil, err
}

p.offsetToType[h.Offset] = obj.Type()

return obj, nil
}

Expand All @@ -300,6 +339,13 @@ func (p *Packfile) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plu
if err != nil {
return err
}
defer bufPool.Put(buf)

return p.fillREFDeltaObjectContentWithBuffer(obj, ref, buf)
}

func (p *Packfile) fillREFDeltaObjectContentWithBuffer(obj plumbing.EncodedObject, ref plumbing.Hash, buf *bytes.Buffer) error {
var err error

base, ok := p.cacheGet(ref)
if !ok {
Expand All @@ -312,30 +358,31 @@ func (p *Packfile) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plu
obj.SetType(base.Type())
err = ApplyDelta(obj, base, buf.Bytes())
p.cachePut(obj)
bufPool.Put(buf)

return err
}

func (p *Packfile) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset int64) error {
buf := bytes.NewBuffer(nil)
buf := bufPool.Get().(*bytes.Buffer)
buf.Reset()
_, _, err := p.s.NextObject(buf)
if err != nil {
return err
}
defer bufPool.Put(buf)

return p.fillOFSDeltaObjectContentWithBuffer(obj, offset, buf)
}

var base plumbing.EncodedObject
var ok bool
func (p *Packfile) fillOFSDeltaObjectContentWithBuffer(obj plumbing.EncodedObject, offset int64, buf *bytes.Buffer) error {
hash, err := p.FindHash(offset)
if err == nil {
base, ok = p.cacheGet(hash)
if err != nil {
return err
}

if !ok {
base, err = p.GetByOffset(offset)
if err != nil {
return err
}
base, err := p.objectAtOffset(offset, hash)
if err != nil {
return err
}

obj.SetType(base.Type())
Expand Down Expand Up @@ -437,14 +484,32 @@ func (i *objectIter) Next() (plumbing.EncodedObject, error) {
return nil, err
}

if i.typ != plumbing.AnyObject {
if typ, ok := i.p.offsetToType[int64(e.Offset)]; ok {
if typ != i.typ {
continue
}
} else {
h, err := i.p.objectHeaderAtOffset(int64(e.Offset))
if err != nil {
return nil, err
}

typ, err := i.p.getObjectType(h)
if err == nil && typ != i.typ {
continue
}

return i.p.getNextObject(h, e.Hash)
}
}

obj, err := i.p.GetByOffset(int64(e.Offset))
if err != nil {
return nil, err
}

if i.typ == plumbing.AnyObject || obj.Type() == i.typ {
return obj, nil
}
return obj, nil
}
}

Expand Down