Skip to content
This repository has been archived by the owner on Aug 2, 2021. It is now read-only.

Commit

Permalink
file, testutil: Add reference file hasher (#2099)
Browse files Browse the repository at this point in the history
  • Loading branch information
nolash authored Feb 24, 2020
1 parent 4f23c06 commit 15fbb9d
Show file tree
Hide file tree
Showing 9 changed files with 472 additions and 2 deletions.
66 changes: 66 additions & 0 deletions file/hasher/common_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package hasher

import (
"github.com/ethersphere/swarm/testutil"
)

const (
sectionSize = 32
branches = 128
chunkSize = 4096
)

var (
dataLengths = []int{31, // 0
32, // 1
33, // 2
63, // 3
64, // 4
65, // 5
chunkSize, // 6
chunkSize + 31, // 7
chunkSize + 32, // 8
chunkSize + 63, // 9
chunkSize + 64, // 10
chunkSize * 2, // 11
chunkSize*2 + 32, // 12
chunkSize * 128, // 13
chunkSize*128 + 31, // 14
chunkSize*128 + 32, // 15
chunkSize*128 + 64, // 16
chunkSize * 129, // 17
chunkSize * 130, // 18
chunkSize * 128 * 128, // 19
chunkSize*128*128 + 32, // 20
}
expected = []string{
"ece86edb20669cc60d142789d464d57bdf5e33cb789d443f608cbd81cfa5697d", // 0
"0be77f0bb7abc9cd0abed640ee29849a3072ccfd1020019fe03658c38f087e02", // 1
"3463b46d4f9d5bfcbf9a23224d635e51896c1daef7d225b86679db17c5fd868e", // 2
"95510c2ff18276ed94be2160aed4e69c9116573b6f69faaeed1b426fea6a3db8", // 3
"490072cc55b8ad381335ff882ac51303cc069cbcb8d8d3f7aa152d9c617829fe", // 4
"541552bae05e9a63a6cb561f69edf36ffe073e441667dbf7a0e9a3864bb744ea", // 5
"c10090961e7682a10890c334d759a28426647141213abda93b096b892824d2ef", // 6
"91699c83ed93a1f87e326a29ccd8cc775323f9e7260035a5f014c975c5f3cd28", // 7
"73759673a52c1f1707cbb61337645f4fcbd209cdc53d7e2cedaaa9f44df61285", // 8
"db1313a727ffc184ae52a70012fbbf7235f551b9f2d2da04bf476abe42a3cb42", // 9
"ade7af36ac0c7297dc1c11fd7b46981b629c6077bce75300f85b02a6153f161b", // 10
"29a5fb121ce96194ba8b7b823a1f9c6af87e1791f824940a53b5a7efe3f790d9", // 11
"61416726988f77b874435bdd89a419edc3861111884fd60e8adf54e2f299efd6", // 12
"3047d841077898c26bbe6be652a2ec590a5d9bd7cd45d290ea42511b48753c09", // 13
"e5c76afa931e33ac94bce2e754b1bb6407d07f738f67856783d93934ca8fc576", // 14
"485a526fc74c8a344c43a4545a5987d17af9ab401c0ef1ef63aefcc5c2c086df", // 15
"624b2abb7aefc0978f891b2a56b665513480e5dc195b4a66cd8def074a6d2e94", // 16
"b8e1804e37a064d28d161ab5f256cc482b1423d5cd0a6b30fde7b0f51ece9199", // 17
"59de730bf6c67a941f3b2ffa2f920acfaa1713695ad5deea12b4a121e5f23fa1", // 18
"522194562123473dcfd7a457b18ee7dee8b7db70ed3cfa2b73f348a992fdfd3b", // 19
"ed0cc44c93b14fef2d91ab3a3674eeb6352a42ac2f0bbe524711824aae1e7bcc", // 20
}

start = 0
end = len(dataLengths)
)

func init() {
testutil.Init()
}
2 changes: 1 addition & 1 deletion file/hasher/hasher.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
// You should have received a copy of the GNU Lesser General Public License
// along with the Swarm library. If not, see <http://www.gnu.org/licenses/>.

package file
package hasher

import (
"context"
Expand Down
2 changes: 1 addition & 1 deletion file/hasher/hasher_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
// You should have received a copy of the GNU Lesser General Public License
// along with the Swarm library. If not, see <http://www.gnu.org/licenses/>.

package file
package hasher

import (
"bytes"
Expand Down
56 changes: 56 additions & 0 deletions file/hasher/param.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package hasher

import (
"context"
"sync"

"github.com/ethersphere/swarm/file"
)

// defines the boundaries of the hashing job and also contains the hash factory function of the job
// setting Debug means omitting any automatic behavior (for now it means job processing won't auto-start)
type treeParams struct {
SectionSize int
Branches int
ChunkSize int
Spans []int
Debug bool
hashFunc file.SectionWriterFunc
writerPool sync.Pool
ctx context.Context
}

func newTreeParams(hashFunc file.SectionWriterFunc) *treeParams {

h := hashFunc(context.Background())
p := &treeParams{
SectionSize: h.SectionSize(),
Branches: h.Branches(),
ChunkSize: h.SectionSize() * h.Branches(),
hashFunc: hashFunc,
}
h.Reset()
p.writerPool.New = func() interface{} {
hf := p.hashFunc(p.ctx)
return hf
}
p.Spans = generateSpanSizes(p.Branches, 9)
return p
}

func (p *treeParams) SetContext(ctx context.Context) {
p.ctx = ctx
}

func (p *treeParams) GetContext() context.Context {
return p.ctx
}

func (p *treeParams) PutWriter(w file.SectionWriter) {
w.Reset()
p.writerPool.Put(w)
}

func (p *treeParams) GetWriter() file.SectionWriter {
return p.writerPool.Get().(file.SectionWriter)
}
145 changes: 145 additions & 0 deletions file/hasher/reference.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
package hasher

import (
"github.com/ethersphere/swarm/file"
)

// ReferenceHasher is the source-of-truth implementation of the swarm file hashing algorithm
type ReferenceHasher struct {
params *treeParams
cursors []int // section write position, indexed per level
length int // number of bytes written to the data level of the hasher
buffer []byte // keeps data and hashes, indexed by cursors
counts []int // number of sums performed, indexed per level
hasher file.SectionWriter // underlying hasher
}

// NewReferenceHasher constructs and returns a new ReferenceHasher
// This implementation is limited to a tree of 9 levels, where level 0 is the data level
// With 32 section size and 128 branches (i.e. unencrypted, non erasure-coded content) this means
// a capacity of 4096 bytes * (128^(9-1)) ~ 295.148 * (10^18) bytes
func NewReferenceHasher(params *treeParams) *ReferenceHasher {
// TODO: remove when bmt interface is amended
h := params.GetWriter()
return &ReferenceHasher{
params: params,
cursors: make([]int, 9),
counts: make([]int, 9),
buffer: make([]byte, params.ChunkSize*9),
hasher: h,
}
}

// Hash computes and returns the root hash of arbitrary data
func (r *ReferenceHasher) Hash(data []byte) []byte {
l := r.params.ChunkSize
for i := 0; i < len(data); i += r.params.ChunkSize {
if len(data)-i < r.params.ChunkSize {
l = len(data) - i
}
r.update(0, data[i:i+l])
}

// if we didn't end on a chunk boundary we need to hash remaining chunks first
r.hashUnfinished()

// if the already hashed parts tree is balanced
r.moveDanglingChunk()

return r.digest()
}

// write to the data buffer on the specified level
// calls sum if chunk boundary is reached and recursively calls this function for the next level with the acquired bmt hash
// adjusts cursors accordingly
func (r *ReferenceHasher) update(lvl int, data []byte) {
if lvl == 0 {
r.length += len(data)
}
copy(r.buffer[r.cursors[lvl]:r.cursors[lvl]+len(data)], data)
r.cursors[lvl] += len(data)
if r.cursors[lvl]-r.cursors[lvl+1] == r.params.ChunkSize {
ref := r.sum(lvl)
r.update(lvl+1, ref)
r.cursors[lvl] = r.cursors[lvl+1]
}
}

// calculates and returns the bmt sum of the last written data on the level
func (r *ReferenceHasher) sum(lvl int) []byte {
r.counts[lvl]++
spanSize := r.params.Spans[lvl] * r.params.ChunkSize
span := (r.length-1)%spanSize + 1

sizeToSum := r.cursors[lvl] - r.cursors[lvl+1]

r.hasher.Reset()
r.hasher.SetSpan(span)
r.hasher.Write(r.buffer[r.cursors[lvl+1] : r.cursors[lvl+1]+sizeToSum])
ref := r.hasher.Sum(nil)
return ref
}

// called after all data has been written
// sums the final chunks of each level
// skips intermediate levels that end on span boundary
func (r *ReferenceHasher) digest() []byte {

// the first section of the buffer will hold the root hash
return r.buffer[:r.params.SectionSize]
}

// hashes the remaining unhashed chunks at the end of each level
func (r *ReferenceHasher) hashUnfinished() {
if r.length%r.params.ChunkSize != 0 {
ref := r.sum(0)
copy(r.buffer[r.cursors[1]:], ref)
r.cursors[1] += len(ref)
r.cursors[0] = r.cursors[1]
}
}

// in case of a balanced tree this method concatenates the reference to the single reference
// at the highest level of the tree.
//
// Let F be full chunks (disregarding branching factor) and S be single references
// in the following scenario:
//
// S
// F F
// F F F
// F F F F S
//
// The result will be:
//
// SS
// F F
// F F F
// F F F F
//
// After which the SS will be hashed to obtain the final root hash
func (r *ReferenceHasher) moveDanglingChunk() {

// calculate the total number of levels needed to represent the data (including the data level)
targetLevel := getLevelsFromLength(r.length, r.params.SectionSize, r.params.Branches)

// sum every intermediate level and write to the level above it
for i := 1; i < targetLevel; i++ {

// and if there is a single reference outside a balanced tree on this level
// don't hash it again but pass it on to the next level
if r.counts[i] > 0 {
// TODO: simplify if possible
if r.counts[i-1]-r.params.Spans[targetLevel-1-i] <= 1 {
r.cursors[i+1] = r.cursors[i]
r.cursors[i] = r.cursors[i-1]
continue
}
}

ref := r.sum(i)
copy(r.buffer[r.cursors[i+1]:], ref)
r.cursors[i+1] += len(ref)
r.cursors[i] = r.cursors[i+1]
}
}
Loading

0 comments on commit 15fbb9d

Please sign in to comment.