Skip to content

Commit

Permalink
lazy reading
Browse files Browse the repository at this point in the history
  • Loading branch information
richardlehane committed Nov 20, 2015
1 parent 9c05c69 commit f909cfa
Show file tree
Hide file tree
Showing 4 changed files with 100 additions and 105 deletions.
64 changes: 33 additions & 31 deletions directory.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,13 @@ func makeDirEntry(b []byte) *directoryEntryFields {

// File represents a MSCFB directory entry
type File struct {
Name string // stream or directory name
Initial uint16 // the first character in the name (identifies special streams such as MSOLEPS property sets)
Path []string // file path
Size uint64 // size of stream
stream [][2]int64 // contains file offsets for the current stream and lengths
Name string // stream or directory name
Initial uint16 // the first character in the name (identifies special streams such as MSOLEPS property sets)
Path []string // file path
Size int64 // size of stream
i int64 // bytes read
readSector uint32 // next sector for Read
rem int64 // offset in current sector remaining previous Read
*directoryEntryFields
r *Reader
}
Expand All @@ -95,7 +97,7 @@ func (fi fileInfo) Size() int64 {
if fi.objectType != stream {
return 0
}
return int64(fi.File.Size)
return fi.File.Size
}
func (fi fileInfo) IsDir() bool { return fi.mode().IsDir() }
func (fi fileInfo) ModTime() time.Time { return fi.Modified() }
Expand Down Expand Up @@ -131,42 +133,41 @@ func (f *File) Modified() time.Time {

// Read this directory entry
// Returns 0, io.EOF if no stream is available (i.e. for a storage object)
func (f *File) Read(b []byte) (n int, err error) {
if f.objectType != stream || f.Size < 1 {
func (f *File) Read(b []byte) (int, error) {
if f.objectType != stream || f.Size < 1 || f.i >= f.Size {
return 0, io.EOF
}
// set the stream if hasn't been done yet
if f.stream == nil {
var mini bool
if f.Size < miniStreamCutoffSize {
mini = true
}
str, err := f.r.stream(f.startingSectorLoc, f.Size, mini)
if err != nil {
return 0, err
}
f.stream = str
sz := len(b)
if int64(sz) > f.Size-f.i {
sz = int(f.Size - f.i)
}
// get sectors and lengths for reads
str, err := f.stream(sz)
if err != nil {
return 0, err
}
// now do the read
str, sz := f.popStream(len(b))
var idx int64
var i int
// now read
var idx, i int
for _, v := range str {
jdx := idx + v[1]
if idx < 0 || jdx < idx || jdx > int64(len(b)) {
jdx := idx + int(v[1])
if jdx < idx || jdx > sz {
return 0, ErrRead
}
j, err := f.r.ra.ReadAt(b[idx:jdx], v[0])
i = i + j
if err != nil {
f.i += int64(i)
return i, ErrRead
}
idx += v[1]
idx = jdx
}
if sz < len(b) {
return sz, io.EOF
f.i += int64(i)
if i != sz {
err = ErrRead
} else if i < len(b) {
err = io.EOF
}
return sz, nil
return i, err
}

func (r *Reader) setDirEntries() error {
Expand All @@ -187,6 +188,7 @@ func (r *Reader) setDirEntries() error {
f.directoryEntryFields = makeDirEntry(buf[i*128:])
if f.directoryEntryFields.objectType != unknown {
fixFile(r.header.majorVersion, f)
f.readSector = f.startingSectorLoc
fs = append(fs, f)
}
}
Expand All @@ -207,9 +209,9 @@ func fixFile(v uint16, f *File) {
fixName(f)
// if the MSCFB major version is 4, then this can be a uint64 otherwise is a uint32 and the least signficant bits can contain junk
if v > 3 {
f.Size = binary.LittleEndian.Uint64(f.streamSize[:])
f.Size = int64(binary.LittleEndian.Uint64(f.streamSize[:]))
} else {
f.Size = uint64(binary.LittleEndian.Uint32(f.streamSize[:4]))
f.Size = int64(binary.LittleEndian.Uint32(f.streamSize[:4]))
}
}

Expand Down
2 changes: 1 addition & 1 deletion mscfb.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ func fileOffset(sn uint32) int64 {
const (
signature uint64 = 0xE11AB1A1E011CFD0
miniStreamSectorSize uint32 = 64
miniStreamCutoffSize uint64 = 4096
miniStreamCutoffSize int64 = 4096
dirEntrySize uint32 = 128 //128 bytes
)

Expand Down
110 changes: 66 additions & 44 deletions streams.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,61 +62,83 @@ func compressChain(locs [][2]int64) [][2]int64 {
return locs
}

func truncate(locs [][2]int64, sz uint64) [][2]int64 {
remainder := int64(len(locs))*locs[0][1] - int64(sz)
locs[len(locs)-1][1] = locs[len(locs)-1][1] - remainder
return locs
}

func (r *Reader) stream(sn uint32, sz uint64, mini bool) ([][2]int64, error) {
// return offsets and lengths for read
func (f *File) stream(sz int) ([][2]int64, error) {
// calculate ministream and sector size
var mini bool
if f.Size < miniStreamCutoffSize {
mini = true
}
var l int
var s int64
var ss int64
if mini {
l = int(sz)/64 + 1
s = 64
l = sz/64 + 2
ss = 64
} else {
l = int(uint32(sz)/sectorSize) + 1
s = int64(sectorSize)
}
chain := make([][2]int64, 0, l)
offset, err := r.getOffset(sn, mini)
if err != nil {
return nil, err
l = sz/int(sectorSize) + 2
ss = int64(sectorSize)
}
for i := 0; i < l; i++ {
chain = append(chain, [2]int64{offset, s})
sn, err = r.findNext(sn, mini)

sectors := make([][2]int64, 0, l)
var i, j int

// if we have a remainder from a previous read, use it first
if f.rem > 0 {
offset, err := f.r.getOffset(f.readSector, mini)
if err != nil {
return nil, err
}
if sn == endOfChain {
return compressChain(truncate(chain, sz)), nil
if ss-f.rem >= int64(sz) {
sectors = append(sectors, [2]int64{offset + f.rem, int64(sz)})
} else {
sectors = append(sectors, [2]int64{offset + f.rem, ss - f.rem})
}
offset, err = r.getOffset(sn, mini)
if err != nil {
return nil, err
if ss-f.rem <= int64(sz) {
f.rem = 0
f.readSector, err = f.r.findNext(f.readSector, mini)
if err != nil {
return nil, err
}
j += int(ss - f.rem)
} else {
f.rem += int64(sz)
}
if sectors[0][1] == int64(sz) {
return sectors, nil
}
if f.readSector == endOfChain {
return nil, ErrRead
}
i++
}
return compressChain(truncate(chain, sz)), nil
}

func (f *File) popStream(sz int) ([][2]int64, int) {
var total int64
s := int64(sz)
for i, v := range f.stream {
total = total + v[1]
if s < total {
dif := total - s
pop := make([][2]int64, i+1)
copy(pop, f.stream[:i+1])
pop[i][1] = pop[i][1] - dif
f.stream = f.stream[i:]
f.stream[0][0] = pop[i][0] + pop[i][1]
f.stream[0][1] = dif
return pop, sz
for {
// emergency brake!
if i >= cap(sectors) {
return nil, ErrRead
}
// grab the next offset
offset, err := f.r.getOffset(f.readSector, mini)
if err != nil {
return nil, err
}
// check if we are at the last sector
if sz-j < int(ss) {
sectors = append(sectors, [2]int64{offset, int64(sz - j)})
f.rem = int64(sz - j)
return compressChain(sectors), nil
} else {
sectors = append(sectors, [2]int64{offset, ss})
j += int(ss)
f.readSector, err = f.r.findNext(f.readSector, mini)
if err != nil {
return nil, err
}
// we might be at the last sector if there is no remainder, if so can return
if j == sz {
return compressChain(sectors), nil
}
}
i++
}
pop := f.stream
f.stream = [][2]int64{}
return pop, int(total)
}
29 changes: 0 additions & 29 deletions streams_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,32 +30,3 @@ func TestCompress(t *testing.T) {
t.Errorf("Streams compress fail; Expecting: %v, Got: %v", br, b)
}
}

func TestPopStream(t *testing.T) {
f := &File{}
f.stream = [][2]int64{[2]int64{50, 500}}
pop, sz := f.popStream(200)
if sz != 200 {
t.Errorf("Streams pop fail: expecting 200, got %d", sz)
}
if pop[0][0] != 50 && pop[0][1] != 200 {
t.Errorf("Streams pop fail: expecting 50, 200, got %d, %d", pop[0], pop[1])
}
if f.stream[0][0] != 200 && f.stream[0][1] != 300 {
t.Errorf("Streams pop fail: expecting 200, 300, got %d, %d", f.stream[0], f.stream[1])
}
f.stream = [][2]int64{[2]int64{50, 500}, [2]int64{1000, 600}}
pop, sz = f.popStream(600)
if sz != 600 {
t.Errorf("Streams pop fail: expecting 600, got %d", sz)
}
if pop[0][0] != 50 && pop[0][1] != 500 {
t.Errorf("Streams pop fail: expecting 50, 500, got %d, %d", pop[0], pop[1])
}
if pop[1][1] != 1000 && pop[1][1] != 100 {
t.Errorf("Streams pop fail: expecting 1000, 100, got %d, %d", pop[0], pop[1])
}
if f.stream[0][0] != 1100 && f.stream[0][1] != 500 {
t.Errorf("Streams pop fail: expecting 1100, 500, got %d, %d", f.stream[0], f.stream[1])
}
}

0 comments on commit f909cfa

Please sign in to comment.