From 980018a56b9f8c3e499a1199a9923172859c4de1 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Sat, 20 Apr 2019 21:50:11 +0200 Subject: [PATCH 01/12] plumbing: packfile, apply small object reading optimization also for delta objects Signed-off-by: Filip Navara --- plumbing/format/packfile/packfile.go | 84 ++++++++++++++++++++-------- 1 file changed, 61 insertions(+), 23 deletions(-) diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go index 69b6e85d0..4fa654ceb 100644 --- a/plumbing/format/packfile/packfile.go +++ b/plumbing/format/packfile/packfile.go @@ -76,10 +76,10 @@ func (p *Packfile) Get(h plumbing.Hash) (plumbing.EncodedObject, error) { return nil, err } - return p.GetByOffset(offset) + return p.objectAtOffset(offset, h) } -// GetByOffset retrieves the encoded object from the packfile with the given +// GetByOffset retrieves the encoded object from the packfile at the given // offset. func (p *Packfile) GetByOffset(o int64) (plumbing.EncodedObject, error) { hash, err := p.FindHash(o) @@ -89,7 +89,7 @@ func (p *Packfile) GetByOffset(o int64) (plumbing.EncodedObject, error) { } } - return p.objectAtOffset(o) + return p.objectAtOffset(o, hash) } // GetSizeByOffset retrieves the size of the encoded object from the @@ -122,6 +122,13 @@ func (p *Packfile) nextObjectHeader() (*ObjectHeader, error) { return h, err } +func (p *Packfile) getDeltaObjectSize(buf *bytes.Buffer) int64 { + delta := buf.Bytes() + _, delta = decodeLEB128(delta) // skip src size + sz, _ := decodeLEB128(delta) + return int64(sz) +} + func (p *Packfile) getObjectSize(h *ObjectHeader) (int64, error) { switch h.Type { case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject: @@ -135,10 +142,7 @@ func (p *Packfile) getObjectSize(h *ObjectHeader) (int64, error) { return 0, err } - delta := buf.Bytes() - _, delta = decodeLEB128(delta) // skip src size - sz, _ := decodeLEB128(delta) - return int64(sz), nil + return p.getDeltaObjectSize(buf), nil default: return 0, ErrInvalidObject.AddDetails("type %q", h.Type) } @@ -179,7 +183,7 @@ func (p *Packfile) getObjectType(h *ObjectHeader) (typ plumbing.ObjectType, err return } -func (p *Packfile) objectAtOffset(offset int64) (plumbing.EncodedObject, error) { +func (p *Packfile) objectAtOffset(offset int64, hash plumbing.Hash) (plumbing.EncodedObject, error) { h, err := p.objectHeaderAtOffset(offset) if err != nil { if err == io.EOF || isInvalid(err) { @@ -194,21 +198,42 @@ func (p *Packfile) objectAtOffset(offset int64) (plumbing.EncodedObject, error) return p.getNextObject(h) } - // If the object is not a delta and it's small enough then read it - // completely into memory now since it is already read from disk - // into buffer anyway. - if h.Length <= smallObjectThreshold && h.Type != plumbing.OFSDeltaObject && h.Type != plumbing.REFDeltaObject { - return p.getNextObject(h) - } + // If the object is small enough then read it completely into memory now since + // it is already read from disk into buffer anyway. For delta objects we want + // to perform the optimization too, but we have to be careful about applying + // small deltas on big objects. + var size int64 + if h.Length <= smallObjectThreshold { + if h.Type != plumbing.OFSDeltaObject && h.Type != plumbing.REFDeltaObject { + return p.getNextObject(h) + } - hash, err := p.FindHash(h.Offset) - if err != nil { - return nil, err - } + // For delta objects we read the delta data and create a special object + // that will hold them in memory and resolve them lazily to the referenced + // object. + buf := bufPool.Get().(*bytes.Buffer) + buf.Reset() + if _, _, err := p.s.NextObject(buf); err != nil { + return nil, err + } + defer bufPool.Put(buf) - size, err := p.getObjectSize(h) - if err != nil { - return nil, err + size = p.getDeltaObjectSize(buf) + if size <= smallObjectThreshold { + var obj = new(plumbing.MemoryObject) + obj.SetSize(size) + if h.Type == plumbing.REFDeltaObject { + err = p.fillREFDeltaObjectContentWithBuffer(obj, h.Reference, buf) + } else { + err = p.fillOFSDeltaObjectContentWithBuffer(obj, h.OffsetReference, buf) + } + return obj, nil + } + } else { + size, err = p.getObjectSize(h) + if err != nil { + return nil, err + } } typ, err := p.getObjectType(h) @@ -300,6 +325,13 @@ func (p *Packfile) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plu if err != nil { return err } + defer bufPool.Put(buf) + + return p.fillREFDeltaObjectContentWithBuffer(obj, ref, buf) +} + +func (p *Packfile) fillREFDeltaObjectContentWithBuffer(obj plumbing.EncodedObject, ref plumbing.Hash, buf *bytes.Buffer) error { + var err error base, ok := p.cacheGet(ref) if !ok { @@ -312,18 +344,24 @@ func (p *Packfile) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plu obj.SetType(base.Type()) err = ApplyDelta(obj, base, buf.Bytes()) p.cachePut(obj) - bufPool.Put(buf) return err } func (p *Packfile) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset int64) error { buf := bytes.NewBuffer(nil) + buf.Reset() _, _, err := p.s.NextObject(buf) if err != nil { return err } + defer bufPool.Put(buf) + return p.fillOFSDeltaObjectContentWithBuffer(obj, offset, buf) +} + +func (p *Packfile) fillOFSDeltaObjectContentWithBuffer(obj plumbing.EncodedObject, offset int64, buf *bytes.Buffer) error { + var err error var base plumbing.EncodedObject var ok bool hash, err := p.FindHash(offset) @@ -332,7 +370,7 @@ func (p *Packfile) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset } if !ok { - base, err = p.GetByOffset(offset) + base, err = p.objectAtOffset(offset, hash) if err != nil { return err } From 72b97da989cd917ed835bd3dc6a0927c8da83480 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Sun, 21 Apr 2019 20:43:20 +0200 Subject: [PATCH 02/12] Update comment Signed-off-by: Filip Navara --- plumbing/format/packfile/packfile.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go index 4fa654ceb..0aad3e744 100644 --- a/plumbing/format/packfile/packfile.go +++ b/plumbing/format/packfile/packfile.go @@ -208,9 +208,9 @@ func (p *Packfile) objectAtOffset(offset int64, hash plumbing.Hash) (plumbing.En return p.getNextObject(h) } - // For delta objects we read the delta data and create a special object - // that will hold them in memory and resolve them lazily to the referenced - // object. + // For delta objects we read the delta data and apply the small object + // optimization only if the expanded version of the object still meets + // the small object threshold condition. buf := bufPool.Get().(*bytes.Buffer) buf.Reset() if _, _, err := p.s.NextObject(buf); err != nil { From 553fba6a738578058d4afc1897bd210d90bb0474 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Mon, 22 Apr 2019 12:30:05 +0200 Subject: [PATCH 03/12] Get the buffer from pool in fillOFSDeltaObjectContent instead of creating new one Signed-off-by: Filip Navara --- plumbing/format/packfile/packfile.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go index 0aad3e744..3d32b364f 100644 --- a/plumbing/format/packfile/packfile.go +++ b/plumbing/format/packfile/packfile.go @@ -349,7 +349,7 @@ func (p *Packfile) fillREFDeltaObjectContentWithBuffer(obj plumbing.EncodedObjec } func (p *Packfile) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset int64) error { - buf := bytes.NewBuffer(nil) + buf := bufPool.Get().(*bytes.Buffer) buf.Reset() _, _, err := p.s.NextObject(buf) if err != nil { From c215c50159f40e97a1030f7bf87b3ca00f968e54 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Tue, 23 Apr 2019 10:50:20 +0200 Subject: [PATCH 04/12] Avoid filling up the object cache from Packfile.GetByType with typ != plumbing.AnyObject Signed-off-by: Filip Navara --- plumbing/format/packfile/packfile.go | 59 ++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 17 deletions(-) diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go index 3d32b364f..55755e409 100644 --- a/plumbing/format/packfile/packfile.go +++ b/plumbing/format/packfile/packfile.go @@ -83,10 +83,8 @@ func (p *Packfile) Get(h plumbing.Hash) (plumbing.EncodedObject, error) { // offset. func (p *Packfile) GetByOffset(o int64) (plumbing.EncodedObject, error) { hash, err := p.FindHash(o) - if err == nil { - if obj, ok := p.deltaBaseCache.Get(hash); ok { - return obj, nil - } + if err != nil { + return nil, err } return p.objectAtOffset(o, hash) @@ -180,10 +178,16 @@ func (p *Packfile) getObjectType(h *ObjectHeader) (typ plumbing.ObjectType, err err = ErrInvalidObject.AddDetails("type %q", h.Type) } + p.offsetToType[h.Offset] = typ + return } func (p *Packfile) objectAtOffset(offset int64, hash plumbing.Hash) (plumbing.EncodedObject, error) { + if obj, ok := p.deltaBaseCache.Get(hash); ok { + return obj, nil + } + h, err := p.objectHeaderAtOffset(offset) if err != nil { if err == io.EOF || isInvalid(err) { @@ -192,6 +196,12 @@ func (p *Packfile) objectAtOffset(offset int64, hash plumbing.Hash) (plumbing.En return nil, err } + return p.getNextObjectLazy(h, hash) +} + +func (p *Packfile) getNextObjectLazy(h *ObjectHeader, hash plumbing.Hash) (plumbing.EncodedObject, error) { + var err error + // If we have no filesystem, we will return a MemoryObject instead // of an FSObject. if p.fs == nil { @@ -303,6 +313,8 @@ func (p *Packfile) getNextObject(h *ObjectHeader) (plumbing.EncodedObject, error return nil, err } + p.offsetToType[h.Offset] = obj.Type() + return obj, nil } @@ -361,19 +373,14 @@ func (p *Packfile) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset } func (p *Packfile) fillOFSDeltaObjectContentWithBuffer(obj plumbing.EncodedObject, offset int64, buf *bytes.Buffer) error { - var err error - var base plumbing.EncodedObject - var ok bool hash, err := p.FindHash(offset) - if err == nil { - base, ok = p.cacheGet(hash) + if err != nil { + return err } - if !ok { - base, err = p.objectAtOffset(offset, hash) - if err != nil { - return err - } + base, err := p.objectAtOffset(offset, hash) + if err != nil { + return err } obj.SetType(base.Type()) @@ -475,14 +482,32 @@ func (i *objectIter) Next() (plumbing.EncodedObject, error) { return nil, err } + if i.typ != plumbing.AnyObject { + if typ, ok := i.p.offsetToType[int64(e.Offset)]; ok { + if typ != i.typ { + continue + } + } else { + h, err := i.p.objectHeaderAtOffset(int64(e.Offset)) + if err != nil { + return nil, err + } + + typ, err := i.p.getObjectType(h) + if err == nil && typ != i.typ { + continue + } + + return i.p.getNextObjectLazy(h, e.Hash) + } + } + obj, err := i.p.GetByOffset(int64(e.Offset)) if err != nil { return nil, err } - if i.typ == plumbing.AnyObject || obj.Type() == i.typ { - return obj, nil - } + return obj, nil } } From 38c061e66678b4e6b92798796973b6f9335df10e Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Tue, 23 Apr 2019 11:50:18 +0200 Subject: [PATCH 05/12] Rename functions Signed-off-by: Filip Navara --- plumbing/format/packfile/packfile.go | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go index 55755e409..5f7a74b44 100644 --- a/plumbing/format/packfile/packfile.go +++ b/plumbing/format/packfile/packfile.go @@ -196,16 +196,16 @@ func (p *Packfile) objectAtOffset(offset int64, hash plumbing.Hash) (plumbing.En return nil, err } - return p.getNextObjectLazy(h, hash) + return p.getNextObject(h, hash) } -func (p *Packfile) getNextObjectLazy(h *ObjectHeader, hash plumbing.Hash) (plumbing.EncodedObject, error) { +func (p *Packfile) getNextObject(h *ObjectHeader, hash plumbing.Hash) (plumbing.EncodedObject, error) { var err error // If we have no filesystem, we will return a MemoryObject instead // of an FSObject. if p.fs == nil { - return p.getNextObject(h) + return p.getNextMemoryObject(h) } // If the object is small enough then read it completely into memory now since @@ -215,7 +215,7 @@ func (p *Packfile) getNextObjectLazy(h *ObjectHeader, hash plumbing.Hash) (plumb var size int64 if h.Length <= smallObjectThreshold { if h.Type != plumbing.OFSDeltaObject && h.Type != plumbing.REFDeltaObject { - return p.getNextObject(h) + return p.getNextMemoryObject(h) } // For delta objects we read the delta data and apply the small object @@ -284,7 +284,9 @@ func (p *Packfile) getObjectContent(offset int64) (io.ReadCloser, error) { return nil, err } - obj, err := p.getNextObject(h) + // getObjectContent is called from FSObject, so we have to explicitly + // get memory object here to avoid recursive cycle + obj, err := p.getNextMemoryObject(h) if err != nil { return nil, err } @@ -292,7 +294,7 @@ func (p *Packfile) getObjectContent(offset int64) (io.ReadCloser, error) { return obj.Reader() } -func (p *Packfile) getNextObject(h *ObjectHeader) (plumbing.EncodedObject, error) { +func (p *Packfile) getNextMemoryObject(h *ObjectHeader) (plumbing.EncodedObject, error) { var obj = new(plumbing.MemoryObject) obj.SetSize(h.Length) obj.SetType(h.Type) @@ -498,7 +500,7 @@ func (i *objectIter) Next() (plumbing.EncodedObject, error) { continue } - return i.p.getNextObjectLazy(h, e.Hash) + return i.p.getNextObject(h, e.Hash) } } From cbcb609afe98bf45df9c6c3b49553a46f4e93a63 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Tue, 23 Apr 2019 13:16:56 +0200 Subject: [PATCH 06/12] Address PR feedback Signed-off-by: Filip Navara --- plumbing/format/packfile/packfile.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go index 5f7a74b44..895c1523e 100644 --- a/plumbing/format/packfile/packfile.go +++ b/plumbing/format/packfile/packfile.go @@ -184,7 +184,7 @@ func (p *Packfile) getObjectType(h *ObjectHeader) (typ plumbing.ObjectType, err } func (p *Packfile) objectAtOffset(offset int64, hash plumbing.Hash) (plumbing.EncodedObject, error) { - if obj, ok := p.deltaBaseCache.Get(hash); ok { + if obj, ok := p.cacheGet(hash); ok { return obj, nil } @@ -237,7 +237,7 @@ func (p *Packfile) getNextObject(h *ObjectHeader, hash plumbing.Hash) (plumbing. } else { err = p.fillOFSDeltaObjectContentWithBuffer(obj, h.OffsetReference, buf) } - return obj, nil + return obj, err } } else { size, err = p.getObjectSize(h) From 809027f163d2b7732b50d9cf7bbc501cc8ed3a1b Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Tue, 23 Apr 2019 22:16:02 +0200 Subject: [PATCH 07/12] Add one more cache check to the iterator Signed-off-by: Filip Navara --- plumbing/format/packfile/packfile.go | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go index 895c1523e..a9121c4a5 100644 --- a/plumbing/format/packfile/packfile.go +++ b/plumbing/format/packfile/packfile.go @@ -490,13 +490,23 @@ func (i *objectIter) Next() (plumbing.EncodedObject, error) { continue } } else { + if obj, ok := i.p.cacheGet(e.Hash); ok { + if obj.Type() != i.typ { + continue + } + return obj, nil + } + h, err := i.p.objectHeaderAtOffset(int64(e.Offset)) if err != nil { return nil, err } typ, err := i.p.getObjectType(h) - if err == nil && typ != i.typ { + if err != nil { + return nil, err + } + if typ != i.typ { continue } @@ -504,7 +514,7 @@ func (i *objectIter) Next() (plumbing.EncodedObject, error) { } } - obj, err := i.p.GetByOffset(int64(e.Offset)) + obj, err := i.p.objectAtOffset(int64(e.Offset), e.Hash) if err != nil { return nil, err } From b649682ff0b1b5141120a391a21b881bd1efa32b Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Tue, 23 Apr 2019 22:59:14 +0200 Subject: [PATCH 08/12] Reduce indentation Signed-off-by: Filip Navara --- plumbing/format/packfile/packfile.go | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go index a9121c4a5..52eb338ba 100644 --- a/plumbing/format/packfile/packfile.go +++ b/plumbing/format/packfile/packfile.go @@ -489,14 +489,12 @@ func (i *objectIter) Next() (plumbing.EncodedObject, error) { if typ != i.typ { continue } - } else { - if obj, ok := i.p.cacheGet(e.Hash); ok { - if obj.Type() != i.typ { - continue - } - return obj, nil + } else if obj, ok := i.p.cacheGet(e.Hash); ok { + if obj.Type() != i.typ { + continue } - + return obj, nil + } else { h, err := i.p.objectHeaderAtOffset(int64(e.Offset)) if err != nil { return nil, err From 3237897858a3a35247e7016c90cb8e906b4cd3d2 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Wed, 24 Apr 2019 09:21:19 +0200 Subject: [PATCH 09/12] Fix an error where getObjectType would seek in file and mess up the position used by getNextObject in the subsequent statement Signed-off-by: Filip Navara --- plumbing/format/packfile/packfile.go | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go index 52eb338ba..0dd973081 100644 --- a/plumbing/format/packfile/packfile.go +++ b/plumbing/format/packfile/packfile.go @@ -500,15 +500,19 @@ func (i *objectIter) Next() (plumbing.EncodedObject, error) { return nil, err } - typ, err := i.p.getObjectType(h) - if err != nil { - return nil, err - } - if typ != i.typ { - continue + if h.Type == plumbing.REFDeltaObject || h.Type == plumbing.OFSDeltaObject { + obj, err := i.p.getNextObject(h, e.Hash) + if err != nil { + return nil, err + } + if obj.Type() == i.typ { + return obj, nil + } + } else if h.Type == i.typ { + return i.p.getNextObject(h, e.Hash) } - return i.p.getNextObject(h, e.Hash) + continue } } From 20ccd734ba1a38498d268226a3aa2e5f5f80d757 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Wed, 24 Apr 2019 09:22:30 +0200 Subject: [PATCH 10/12] Remove duplicate cache lookup in getObjectContent, it is already done in FSObject Signed-off-by: Filip Navara --- plumbing/format/packfile/packfile.go | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go index 0dd973081..d4e601068 100644 --- a/plumbing/format/packfile/packfile.go +++ b/plumbing/format/packfile/packfile.go @@ -266,19 +266,6 @@ func (p *Packfile) getNextObject(h *ObjectHeader, hash plumbing.Hash) (plumbing. } func (p *Packfile) getObjectContent(offset int64) (io.ReadCloser, error) { - ref, err := p.FindHash(offset) - if err == nil { - obj, ok := p.cacheGet(ref) - if ok { - reader, err := obj.Reader() - if err != nil { - return nil, err - } - - return reader, nil - } - } - h, err := p.objectHeaderAtOffset(offset) if err != nil { return nil, err From b0e21ee09bde46ab5002c04680885890390d3cf7 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Wed, 24 Apr 2019 09:30:00 +0200 Subject: [PATCH 11/12] Better fix for the getObjectType problem in the iterator Signed-off-by: Filip Navara --- plumbing/format/packfile/packfile.go | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go index d4e601068..a8f3d2bed 100644 --- a/plumbing/format/packfile/packfile.go +++ b/plumbing/format/packfile/packfile.go @@ -488,18 +488,21 @@ func (i *objectIter) Next() (plumbing.EncodedObject, error) { } if h.Type == plumbing.REFDeltaObject || h.Type == plumbing.OFSDeltaObject { - obj, err := i.p.getNextObject(h, e.Hash) + typ, err := i.p.getObjectType(h) if err != nil { return nil, err } - if obj.Type() == i.typ { - return obj, nil + if typ != i.typ { + continue + } + // getObjectType will seek in the file so we cannot use getNextObject safely + return i.p.objectAtOffset(int64(e.Offset), e.Hash) + } else { + if h.Type != i.typ { + continue } - } else if h.Type == i.typ { return i.p.getNextObject(h, e.Hash) } - - continue } } From 78bab69f9cb3d93b256f13e8b73f223f39d3d670 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Wed, 24 Apr 2019 09:42:53 +0200 Subject: [PATCH 12/12] Populate the offsetToType cache in the iterator even for the skipped objects Signed-off-by: Filip Navara --- plumbing/format/packfile/packfile.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go index a8f3d2bed..def6e997a 100644 --- a/plumbing/format/packfile/packfile.go +++ b/plumbing/format/packfile/packfile.go @@ -478,6 +478,7 @@ func (i *objectIter) Next() (plumbing.EncodedObject, error) { } } else if obj, ok := i.p.cacheGet(e.Hash); ok { if obj.Type() != i.typ { + i.p.offsetToType[int64(e.Offset)] = obj.Type() continue } return obj, nil @@ -493,12 +494,14 @@ func (i *objectIter) Next() (plumbing.EncodedObject, error) { return nil, err } if typ != i.typ { + i.p.offsetToType[int64(e.Offset)] = typ continue } // getObjectType will seek in the file so we cannot use getNextObject safely return i.p.objectAtOffset(int64(e.Offset), e.Hash) } else { if h.Type != i.typ { + i.p.offsetToType[int64(e.Offset)] = h.Type continue } return i.p.getNextObject(h, e.Hash)