Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use a ReaderAt instead of an in-memory cache to resolve deltas #160

Merged
merged 1 commit into from
Oct 11, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 83 additions & 37 deletions git/indexpack.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"fmt"
"io"
"io/ioutil"
"log"
"os"
"sort"
"sync"
Expand Down Expand Up @@ -45,7 +46,7 @@ type IndexPackOptions struct {
}

type PackfileIndex interface {
GetObject(i io.ReadSeeker, s Sha1) (GitObject, error)
GetObject(i io.ReaderAt, s Sha1) (GitObject, error)
HasObject(s Sha1) bool
WriteIndex(w io.Writer) error
GetTrailer() (Packfile Sha1, Index Sha1)
Expand Down Expand Up @@ -123,17 +124,21 @@ func (idx PackfileIndexV2) WriteIndex(w io.Writer) error {

// Using the index, retrieve an object from the packfile represented by r at offset
// offset.
func (idx PackfileIndexV2) getObjectAtOffset(r io.ReadSeeker, offset int64, metaOnly bool) (GitObject, error) {
func (idx PackfileIndexV2) getObjectAtOffset(r io.ReaderAt, offset int64, metaOnly bool) (GitObject, error) {
var p PackfileHeader

if _, err := r.Seek(offset, io.SeekStart); err != nil {
return nil, err
}

t, sz, ref, refoffset, _ := p.ReadHeaderSize(r)
// 4k should be enough for the header.
metareader := io.NewSectionReader(r, offset, 4096)
t, sz, ref, refoffset, rawheader := p.ReadHeaderSize(metareader)
var rawdata []byte
// sz is the uncompressed size, so the total size should be less than
// sz for the compressed data. It might theoretically be a little more,
// but we're very generous here since this doesn't allocate anything but
// just determines how much data the SectionReader will read before
// returning an EOF.
datareader := io.NewSectionReader(r, offset+int64(len(rawheader)), int64(sz*3))
if !metaOnly || t == OBJ_OFS_DELTA || t == OBJ_REF_DELTA {
rawdata = p.readEntryDataStream1(r)
rawdata = p.readEntryDataStream1(datareader)
}

// The way we calculate the hash changes based on if it's a delta
Expand Down Expand Up @@ -220,11 +225,10 @@ func (idx PackfileIndexV2) getObjectAtOffset(r io.ReadSeeker, offset int64, meta
default:
return nil, fmt.Errorf("Unhandled object type.")
}

}

// Find the object in the table.
func (idx PackfileIndexV2) GetObjectMetadata(r io.ReadSeeker, s Sha1) (GitObject, error) {
func (idx PackfileIndexV2) GetObjectMetadata(r io.ReaderAt, s Sha1) (GitObject, error) {
foundIdx := -1
startIdx := idx.Fanout[s[0]]

Expand All @@ -238,7 +242,7 @@ func (idx PackfileIndexV2) GetObjectMetadata(r io.ReadSeeker, s Sha1) (GitObject
}
}
if foundIdx == -1 {
return nil, fmt.Errorf("Object not found")
return nil, fmt.Errorf("Object not found: %v", s)
}

var offset int64
Expand All @@ -255,7 +259,7 @@ func (idx PackfileIndexV2) GetObjectMetadata(r io.ReadSeeker, s Sha1) (GitObject
return idx.getObjectAtOffset(r, offset, true)
}

func (idx PackfileIndexV2) GetObject(r io.ReadSeeker, s Sha1) (GitObject, error) {
func (idx PackfileIndexV2) GetObject(r io.ReaderAt, s Sha1) (GitObject, error) {
foundIdx := -1
startIdx := idx.Fanout[s[0]]
if startIdx <= 0 {
Expand All @@ -275,7 +279,7 @@ func (idx PackfileIndexV2) GetObject(r io.ReadSeeker, s Sha1) (GitObject, error)
}
}
if foundIdx == -1 {
return nil, fmt.Errorf("Object not found")
return nil, fmt.Errorf("Object not found: %v", s)
}

var offset int64
Expand All @@ -292,7 +296,7 @@ func (idx PackfileIndexV2) GetObject(r io.ReadSeeker, s Sha1) (GitObject, error)
return idx.getObjectAtOffset(r, offset, false)
}

func getPackFileObject(idx io.Reader, packfile io.ReadSeeker, s Sha1, metaOnly bool) (GitObject, error) {
func getPackFileObject(idx io.Reader, packfile io.ReaderAt, s Sha1, metaOnly bool) (GitObject, error) {
var pack PackfileIndexV2
if err := binary.Read(idx, binary.BigEndian, &pack.magic); err != nil {
return nil, err
Expand Down Expand Up @@ -500,8 +504,7 @@ func IndexPack(c *Client, opts IndexPackOptions, r io.Reader) (idx PackfileIndex
indexfile.Sha1Table = make([]Sha1, p.Size)
indexfile.CRC32 = make([]uint32, p.Size)
indexfile.FourByteOffsets = make([]uint32, p.Size)
ofsChains := make(map[ObjectOffset]resolvedDelta)
refChains := make(map[Sha1]resolvedDelta)
priorObjects := make(map[Sha1]ObjectOffset)

if iscopying {
// Seek past the header that was just copied.
Expand Down Expand Up @@ -551,9 +554,10 @@ func IndexPack(c *Client, opts IndexPackOptions, r io.Reader) (idx PackfileIndex

// The way we calculate the hash changes based on if it's a delta
// or not.
var sha1 Sha1
switch t {
case OBJ_COMMIT, OBJ_TREE, OBJ_BLOB:
sha1, _, err := HashSlice(t.String(), rawdata)
sha1, _, err = HashSlice(t.String(), rawdata)
if err != nil && opts.Strict {
return indexfile, err
}
Expand All @@ -567,27 +571,55 @@ func IndexPack(c *Client, opts IndexPackOptions, r io.Reader) (idx PackfileIndex
// Maintain the list of references for further chains
// to use.
mu.Lock()
refChains[sha1] = resolvedDelta{rawdata, t}
ofsChains[ObjectOffset(location)] = resolvedDelta{rawdata, t}
priorObjects[sha1] = ObjectOffset(location)
mu.Unlock()

wg.Done()
case OBJ_OFS_DELTA:
t, deltadata, err := calculateOfsDelta(ObjectOffset(location)-offset, rawdata, ofsChains)
if err != nil && opts.Strict {
return indexfile, err
}
case OBJ_REF_DELTA:
log.Printf("Resolving REF_DELTA from %v\n", ref)
mu.Lock()
ofsChains[ObjectOffset(location)] = resolvedDelta{deltadata, t}
o, ok := priorObjects[ref]
if !ok {
mu.Unlock()
panic("Could not find basis for REF_DELTA")
}
// The refs in the index file need to be sorted in
// order for GetObject to look up the other SHA1s
// when resolving deltas. Chains don't have access
// to the priorObjects map that we have here.
sort.Sort(&indexfile)
mu.Unlock()
if err != nil && opts.Strict {
return nil, err
if err != nil {
return indexfile, err
}
base, err := indexfile.getObjectAtOffset(file, int64(o), false)
if err != nil {
return indexfile, err
}
res := resolvedDelta{Value: base.GetContent()}
_, val, err := calculateDelta(res, rawdata)
if err != nil {
return indexfile, err
}
sha1, _, err := HashSlice(t.String(), deltadata)
switch base.GetType() {
case "commit":
t = OBJ_COMMIT
case "tree":
t = OBJ_TREE
case "blob":
t = OBJ_BLOB
default:
panic("Unhandled delta base type" + base.GetType())
}
sha1, _, err = HashSlice(base.GetType(), val)
if err != nil && opts.Strict {
return nil, err
}

mu.Lock()
priorObjects[sha1] = ObjectOffset(location)
mu.Unlock()

mu.Lock()
for j := int(sha1[0]); j < 256; j++ {
indexfile.Fanout[j]++
Expand All @@ -596,29 +628,43 @@ func IndexPack(c *Client, opts IndexPackOptions, r io.Reader) (idx PackfileIndex

mu.Unlock()
wg.Done()
case OBJ_REF_DELTA:
t, deltadata, err := calculateRefDelta(ref, rawdata, refChains)
if err != nil && opts.Strict {
case OBJ_OFS_DELTA:
log.Printf("Resolving OFS_DELTA from %v\n", location-int64(offset))
base, err := indexfile.getObjectAtOffset(file, location-int64(offset), false)
if err != nil {
return indexfile, err
}

res := resolvedDelta{Value: base.GetContent()}
_, val, err := calculateDelta(res, rawdata)
if err != nil {
return indexfile, err
}
switch base.GetType() {
case "commit":
t = OBJ_COMMIT
case "tree":
t = OBJ_TREE
case "blob":
t = OBJ_BLOB
default:
panic("Unhandled delta base type" + base.GetType())
}
sha1, _, err = HashSlice(base.GetType(), val)
if err != nil && opts.Strict {
return nil, err
}

sha1, _, err := HashSlice(t.String(), deltadata)
mu.Lock()
refChains[sha1] = resolvedDelta{deltadata, t}
priorObjects[sha1] = ObjectOffset(location)
mu.Unlock()

if err != nil && opts.Strict {
return nil, err
}

mu.Lock()
for j := int(sha1[0]); j < 256; j++ {
indexfile.Fanout[j]++
}
indexfile.Sha1Table[i] = sha1

mu.Unlock()
wg.Done()
default:
Expand Down
7 changes: 5 additions & 2 deletions git/packfile.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,10 +106,13 @@ func (p PackfileHeader) ReadHeaderSize(r io.Reader) (PackEntryType, PackEntrySiz
}
switch entrytype {
case OBJ_REF_DELTA:
n, err := r.Read(refDelta)
if n != 20 || err != nil {
n, err := io.ReadFull(r, refDelta)
if err != nil {
panic(err)
}
if n != 20 {
panic(fmt.Sprintf("Could not read refDelta base. Got %v (%x) instead of 20 bytes", n, refDelta[:n]))
}
dataread = append(dataread, refDelta...)
sha, err := Sha1FromSlice(refDelta)
if err != nil {
Expand Down