From f909cfa596c7880c650ed5440df90e5474f08b29 Mon Sep 17 00:00:00 2001 From: richardlehane Date: Fri, 20 Nov 2015 16:58:19 +1100 Subject: [PATCH] lazy reading --- directory.go | 64 ++++++++++++++-------------- mscfb.go | 2 +- streams.go | 110 +++++++++++++++++++++++++++++------------------- streams_test.go | 29 ------------- 4 files changed, 100 insertions(+), 105 deletions(-) diff --git a/directory.go b/directory.go index a1517a2..0e6aeee 100644 --- a/directory.go +++ b/directory.go @@ -79,11 +79,13 @@ func makeDirEntry(b []byte) *directoryEntryFields { // File represents a MSCFB directory entry type File struct { - Name string // stream or directory name - Initial uint16 // the first character in the name (identifies special streams such as MSOLEPS property sets) - Path []string // file path - Size uint64 // size of stream - stream [][2]int64 // contains file offsets for the current stream and lengths + Name string // stream or directory name + Initial uint16 // the first character in the name (identifies special streams such as MSOLEPS property sets) + Path []string // file path + Size int64 // size of stream + i int64 // bytes read + readSector uint32 // next sector for Read + rem int64 // offset in current sector remaining previous Read *directoryEntryFields r *Reader } @@ -95,7 +97,7 @@ func (fi fileInfo) Size() int64 { if fi.objectType != stream { return 0 } - return int64(fi.File.Size) + return fi.File.Size } func (fi fileInfo) IsDir() bool { return fi.mode().IsDir() } func (fi fileInfo) ModTime() time.Time { return fi.Modified() } @@ -131,42 +133,41 @@ func (f *File) Modified() time.Time { // Read this directory entry // Returns 0, io.EOF if no stream is available (i.e. for a storage object) -func (f *File) Read(b []byte) (n int, err error) { - if f.objectType != stream || f.Size < 1 { +func (f *File) Read(b []byte) (int, error) { + if f.objectType != stream || f.Size < 1 || f.i >= f.Size { return 0, io.EOF } - // set the stream if hasn't been done yet - if f.stream == nil { - var mini bool - if f.Size < miniStreamCutoffSize { - mini = true - } - str, err := f.r.stream(f.startingSectorLoc, f.Size, mini) - if err != nil { - return 0, err - } - f.stream = str + sz := len(b) + if int64(sz) > f.Size-f.i { + sz = int(f.Size - f.i) + } + // get sectors and lengths for reads + str, err := f.stream(sz) + if err != nil { + return 0, err } - // now do the read - str, sz := f.popStream(len(b)) - var idx int64 - var i int + // now read + var idx, i int for _, v := range str { - jdx := idx + v[1] - if idx < 0 || jdx < idx || jdx > int64(len(b)) { + jdx := idx + int(v[1]) + if jdx < idx || jdx > sz { return 0, ErrRead } j, err := f.r.ra.ReadAt(b[idx:jdx], v[0]) i = i + j if err != nil { + f.i += int64(i) return i, ErrRead } - idx += v[1] + idx = jdx } - if sz < len(b) { - return sz, io.EOF + f.i += int64(i) + if i != sz { + err = ErrRead + } else if i < len(b) { + err = io.EOF } - return sz, nil + return i, err } func (r *Reader) setDirEntries() error { @@ -187,6 +188,7 @@ func (r *Reader) setDirEntries() error { f.directoryEntryFields = makeDirEntry(buf[i*128:]) if f.directoryEntryFields.objectType != unknown { fixFile(r.header.majorVersion, f) + f.readSector = f.startingSectorLoc fs = append(fs, f) } } @@ -207,9 +209,9 @@ func fixFile(v uint16, f *File) { fixName(f) // if the MSCFB major version is 4, then this can be a uint64 otherwise is a uint32 and the least signficant bits can contain junk if v > 3 { - f.Size = binary.LittleEndian.Uint64(f.streamSize[:]) + f.Size = int64(binary.LittleEndian.Uint64(f.streamSize[:])) } else { - f.Size = uint64(binary.LittleEndian.Uint32(f.streamSize[:4])) + f.Size = int64(binary.LittleEndian.Uint32(f.streamSize[:4])) } } diff --git a/mscfb.go b/mscfb.go index 3597ca7..73bb679 100644 --- a/mscfb.go +++ b/mscfb.go @@ -62,7 +62,7 @@ func fileOffset(sn uint32) int64 { const ( signature uint64 = 0xE11AB1A1E011CFD0 miniStreamSectorSize uint32 = 64 - miniStreamCutoffSize uint64 = 4096 + miniStreamCutoffSize int64 = 4096 dirEntrySize uint32 = 128 //128 bytes ) diff --git a/streams.go b/streams.go index cd58161..6020d47 100644 --- a/streams.go +++ b/streams.go @@ -62,61 +62,83 @@ func compressChain(locs [][2]int64) [][2]int64 { return locs } -func truncate(locs [][2]int64, sz uint64) [][2]int64 { - remainder := int64(len(locs))*locs[0][1] - int64(sz) - locs[len(locs)-1][1] = locs[len(locs)-1][1] - remainder - return locs -} - -func (r *Reader) stream(sn uint32, sz uint64, mini bool) ([][2]int64, error) { +// return offsets and lengths for read +func (f *File) stream(sz int) ([][2]int64, error) { + // calculate ministream and sector size + var mini bool + if f.Size < miniStreamCutoffSize { + mini = true + } var l int - var s int64 + var ss int64 if mini { - l = int(sz)/64 + 1 - s = 64 + l = sz/64 + 2 + ss = 64 } else { - l = int(uint32(sz)/sectorSize) + 1 - s = int64(sectorSize) - } - chain := make([][2]int64, 0, l) - offset, err := r.getOffset(sn, mini) - if err != nil { - return nil, err + l = sz/int(sectorSize) + 2 + ss = int64(sectorSize) } - for i := 0; i < l; i++ { - chain = append(chain, [2]int64{offset, s}) - sn, err = r.findNext(sn, mini) + + sectors := make([][2]int64, 0, l) + var i, j int + + // if we have a remainder from a previous read, use it first + if f.rem > 0 { + offset, err := f.r.getOffset(f.readSector, mini) if err != nil { return nil, err } - if sn == endOfChain { - return compressChain(truncate(chain, sz)), nil + if ss-f.rem >= int64(sz) { + sectors = append(sectors, [2]int64{offset + f.rem, int64(sz)}) + } else { + sectors = append(sectors, [2]int64{offset + f.rem, ss - f.rem}) } - offset, err = r.getOffset(sn, mini) - if err != nil { - return nil, err + if ss-f.rem <= int64(sz) { + f.rem = 0 + f.readSector, err = f.r.findNext(f.readSector, mini) + if err != nil { + return nil, err + } + j += int(ss - f.rem) + } else { + f.rem += int64(sz) + } + if sectors[0][1] == int64(sz) { + return sectors, nil + } + if f.readSector == endOfChain { + return nil, ErrRead } + i++ } - return compressChain(truncate(chain, sz)), nil -} -func (f *File) popStream(sz int) ([][2]int64, int) { - var total int64 - s := int64(sz) - for i, v := range f.stream { - total = total + v[1] - if s < total { - dif := total - s - pop := make([][2]int64, i+1) - copy(pop, f.stream[:i+1]) - pop[i][1] = pop[i][1] - dif - f.stream = f.stream[i:] - f.stream[0][0] = pop[i][0] + pop[i][1] - f.stream[0][1] = dif - return pop, sz + for { + // emergency brake! + if i >= cap(sectors) { + return nil, ErrRead + } + // grab the next offset + offset, err := f.r.getOffset(f.readSector, mini) + if err != nil { + return nil, err + } + // check if we are at the last sector + if sz-j < int(ss) { + sectors = append(sectors, [2]int64{offset, int64(sz - j)}) + f.rem = int64(sz - j) + return compressChain(sectors), nil + } else { + sectors = append(sectors, [2]int64{offset, ss}) + j += int(ss) + f.readSector, err = f.r.findNext(f.readSector, mini) + if err != nil { + return nil, err + } + // we might be at the last sector if there is no remainder, if so can return + if j == sz { + return compressChain(sectors), nil + } } + i++ } - pop := f.stream - f.stream = [][2]int64{} - return pop, int(total) } diff --git a/streams_test.go b/streams_test.go index 52e4ac4..a05d68e 100644 --- a/streams_test.go +++ b/streams_test.go @@ -30,32 +30,3 @@ func TestCompress(t *testing.T) { t.Errorf("Streams compress fail; Expecting: %v, Got: %v", br, b) } } - -func TestPopStream(t *testing.T) { - f := &File{} - f.stream = [][2]int64{[2]int64{50, 500}} - pop, sz := f.popStream(200) - if sz != 200 { - t.Errorf("Streams pop fail: expecting 200, got %d", sz) - } - if pop[0][0] != 50 && pop[0][1] != 200 { - t.Errorf("Streams pop fail: expecting 50, 200, got %d, %d", pop[0], pop[1]) - } - if f.stream[0][0] != 200 && f.stream[0][1] != 300 { - t.Errorf("Streams pop fail: expecting 200, 300, got %d, %d", f.stream[0], f.stream[1]) - } - f.stream = [][2]int64{[2]int64{50, 500}, [2]int64{1000, 600}} - pop, sz = f.popStream(600) - if sz != 600 { - t.Errorf("Streams pop fail: expecting 600, got %d", sz) - } - if pop[0][0] != 50 && pop[0][1] != 500 { - t.Errorf("Streams pop fail: expecting 50, 500, got %d, %d", pop[0], pop[1]) - } - if pop[1][1] != 1000 && pop[1][1] != 100 { - t.Errorf("Streams pop fail: expecting 1000, 100, got %d, %d", pop[0], pop[1]) - } - if f.stream[0][0] != 1100 && f.stream[0][1] != 500 { - t.Errorf("Streams pop fail: expecting 1100, 500, got %d, %d", f.stream[0], f.stream[1]) - } -}