This repository has been archived by the owner on Aug 2, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 110
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
file, testutil: Add reference file hasher (#2099)
- Loading branch information
Showing
9 changed files
with
472 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
package hasher | ||
|
||
import ( | ||
"github.com/ethersphere/swarm/testutil" | ||
) | ||
|
||
const ( | ||
sectionSize = 32 | ||
branches = 128 | ||
chunkSize = 4096 | ||
) | ||
|
||
var ( | ||
dataLengths = []int{31, // 0 | ||
32, // 1 | ||
33, // 2 | ||
63, // 3 | ||
64, // 4 | ||
65, // 5 | ||
chunkSize, // 6 | ||
chunkSize + 31, // 7 | ||
chunkSize + 32, // 8 | ||
chunkSize + 63, // 9 | ||
chunkSize + 64, // 10 | ||
chunkSize * 2, // 11 | ||
chunkSize*2 + 32, // 12 | ||
chunkSize * 128, // 13 | ||
chunkSize*128 + 31, // 14 | ||
chunkSize*128 + 32, // 15 | ||
chunkSize*128 + 64, // 16 | ||
chunkSize * 129, // 17 | ||
chunkSize * 130, // 18 | ||
chunkSize * 128 * 128, // 19 | ||
chunkSize*128*128 + 32, // 20 | ||
} | ||
expected = []string{ | ||
"ece86edb20669cc60d142789d464d57bdf5e33cb789d443f608cbd81cfa5697d", // 0 | ||
"0be77f0bb7abc9cd0abed640ee29849a3072ccfd1020019fe03658c38f087e02", // 1 | ||
"3463b46d4f9d5bfcbf9a23224d635e51896c1daef7d225b86679db17c5fd868e", // 2 | ||
"95510c2ff18276ed94be2160aed4e69c9116573b6f69faaeed1b426fea6a3db8", // 3 | ||
"490072cc55b8ad381335ff882ac51303cc069cbcb8d8d3f7aa152d9c617829fe", // 4 | ||
"541552bae05e9a63a6cb561f69edf36ffe073e441667dbf7a0e9a3864bb744ea", // 5 | ||
"c10090961e7682a10890c334d759a28426647141213abda93b096b892824d2ef", // 6 | ||
"91699c83ed93a1f87e326a29ccd8cc775323f9e7260035a5f014c975c5f3cd28", // 7 | ||
"73759673a52c1f1707cbb61337645f4fcbd209cdc53d7e2cedaaa9f44df61285", // 8 | ||
"db1313a727ffc184ae52a70012fbbf7235f551b9f2d2da04bf476abe42a3cb42", // 9 | ||
"ade7af36ac0c7297dc1c11fd7b46981b629c6077bce75300f85b02a6153f161b", // 10 | ||
"29a5fb121ce96194ba8b7b823a1f9c6af87e1791f824940a53b5a7efe3f790d9", // 11 | ||
"61416726988f77b874435bdd89a419edc3861111884fd60e8adf54e2f299efd6", // 12 | ||
"3047d841077898c26bbe6be652a2ec590a5d9bd7cd45d290ea42511b48753c09", // 13 | ||
"e5c76afa931e33ac94bce2e754b1bb6407d07f738f67856783d93934ca8fc576", // 14 | ||
"485a526fc74c8a344c43a4545a5987d17af9ab401c0ef1ef63aefcc5c2c086df", // 15 | ||
"624b2abb7aefc0978f891b2a56b665513480e5dc195b4a66cd8def074a6d2e94", // 16 | ||
"b8e1804e37a064d28d161ab5f256cc482b1423d5cd0a6b30fde7b0f51ece9199", // 17 | ||
"59de730bf6c67a941f3b2ffa2f920acfaa1713695ad5deea12b4a121e5f23fa1", // 18 | ||
"522194562123473dcfd7a457b18ee7dee8b7db70ed3cfa2b73f348a992fdfd3b", // 19 | ||
"ed0cc44c93b14fef2d91ab3a3674eeb6352a42ac2f0bbe524711824aae1e7bcc", // 20 | ||
} | ||
|
||
start = 0 | ||
end = len(dataLengths) | ||
) | ||
|
||
func init() { | ||
testutil.Init() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
package hasher | ||
|
||
import ( | ||
"context" | ||
"sync" | ||
|
||
"github.com/ethersphere/swarm/file" | ||
) | ||
|
||
// defines the boundaries of the hashing job and also contains the hash factory function of the job | ||
// setting Debug means omitting any automatic behavior (for now it means job processing won't auto-start) | ||
type treeParams struct { | ||
SectionSize int | ||
Branches int | ||
ChunkSize int | ||
Spans []int | ||
Debug bool | ||
hashFunc file.SectionWriterFunc | ||
writerPool sync.Pool | ||
ctx context.Context | ||
} | ||
|
||
func newTreeParams(hashFunc file.SectionWriterFunc) *treeParams { | ||
|
||
h := hashFunc(context.Background()) | ||
p := &treeParams{ | ||
SectionSize: h.SectionSize(), | ||
Branches: h.Branches(), | ||
ChunkSize: h.SectionSize() * h.Branches(), | ||
hashFunc: hashFunc, | ||
} | ||
h.Reset() | ||
p.writerPool.New = func() interface{} { | ||
hf := p.hashFunc(p.ctx) | ||
return hf | ||
} | ||
p.Spans = generateSpanSizes(p.Branches, 9) | ||
return p | ||
} | ||
|
||
func (p *treeParams) SetContext(ctx context.Context) { | ||
p.ctx = ctx | ||
} | ||
|
||
func (p *treeParams) GetContext() context.Context { | ||
return p.ctx | ||
} | ||
|
||
func (p *treeParams) PutWriter(w file.SectionWriter) { | ||
w.Reset() | ||
p.writerPool.Put(w) | ||
} | ||
|
||
func (p *treeParams) GetWriter() file.SectionWriter { | ||
return p.writerPool.Get().(file.SectionWriter) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
package hasher | ||
|
||
import ( | ||
"github.com/ethersphere/swarm/file" | ||
) | ||
|
||
// ReferenceHasher is the source-of-truth implementation of the swarm file hashing algorithm | ||
type ReferenceHasher struct { | ||
params *treeParams | ||
cursors []int // section write position, indexed per level | ||
length int // number of bytes written to the data level of the hasher | ||
buffer []byte // keeps data and hashes, indexed by cursors | ||
counts []int // number of sums performed, indexed per level | ||
hasher file.SectionWriter // underlying hasher | ||
} | ||
|
||
// NewReferenceHasher constructs and returns a new ReferenceHasher | ||
// This implementation is limited to a tree of 9 levels, where level 0 is the data level | ||
// With 32 section size and 128 branches (i.e. unencrypted, non erasure-coded content) this means | ||
// a capacity of 4096 bytes * (128^(9-1)) ~ 295.148 * (10^18) bytes | ||
func NewReferenceHasher(params *treeParams) *ReferenceHasher { | ||
// TODO: remove when bmt interface is amended | ||
h := params.GetWriter() | ||
return &ReferenceHasher{ | ||
params: params, | ||
cursors: make([]int, 9), | ||
counts: make([]int, 9), | ||
buffer: make([]byte, params.ChunkSize*9), | ||
hasher: h, | ||
} | ||
} | ||
|
||
// Hash computes and returns the root hash of arbitrary data | ||
func (r *ReferenceHasher) Hash(data []byte) []byte { | ||
l := r.params.ChunkSize | ||
for i := 0; i < len(data); i += r.params.ChunkSize { | ||
if len(data)-i < r.params.ChunkSize { | ||
l = len(data) - i | ||
} | ||
r.update(0, data[i:i+l]) | ||
} | ||
|
||
// if we didn't end on a chunk boundary we need to hash remaining chunks first | ||
r.hashUnfinished() | ||
|
||
// if the already hashed parts tree is balanced | ||
r.moveDanglingChunk() | ||
|
||
return r.digest() | ||
} | ||
|
||
// write to the data buffer on the specified level | ||
// calls sum if chunk boundary is reached and recursively calls this function for the next level with the acquired bmt hash | ||
// adjusts cursors accordingly | ||
func (r *ReferenceHasher) update(lvl int, data []byte) { | ||
if lvl == 0 { | ||
r.length += len(data) | ||
} | ||
copy(r.buffer[r.cursors[lvl]:r.cursors[lvl]+len(data)], data) | ||
r.cursors[lvl] += len(data) | ||
if r.cursors[lvl]-r.cursors[lvl+1] == r.params.ChunkSize { | ||
ref := r.sum(lvl) | ||
r.update(lvl+1, ref) | ||
r.cursors[lvl] = r.cursors[lvl+1] | ||
} | ||
} | ||
|
||
// calculates and returns the bmt sum of the last written data on the level | ||
func (r *ReferenceHasher) sum(lvl int) []byte { | ||
r.counts[lvl]++ | ||
spanSize := r.params.Spans[lvl] * r.params.ChunkSize | ||
span := (r.length-1)%spanSize + 1 | ||
|
||
sizeToSum := r.cursors[lvl] - r.cursors[lvl+1] | ||
|
||
r.hasher.Reset() | ||
r.hasher.SetSpan(span) | ||
r.hasher.Write(r.buffer[r.cursors[lvl+1] : r.cursors[lvl+1]+sizeToSum]) | ||
ref := r.hasher.Sum(nil) | ||
return ref | ||
} | ||
|
||
// called after all data has been written | ||
// sums the final chunks of each level | ||
// skips intermediate levels that end on span boundary | ||
func (r *ReferenceHasher) digest() []byte { | ||
|
||
// the first section of the buffer will hold the root hash | ||
return r.buffer[:r.params.SectionSize] | ||
} | ||
|
||
// hashes the remaining unhashed chunks at the end of each level | ||
func (r *ReferenceHasher) hashUnfinished() { | ||
if r.length%r.params.ChunkSize != 0 { | ||
ref := r.sum(0) | ||
copy(r.buffer[r.cursors[1]:], ref) | ||
r.cursors[1] += len(ref) | ||
r.cursors[0] = r.cursors[1] | ||
} | ||
} | ||
|
||
// in case of a balanced tree this method concatenates the reference to the single reference | ||
// at the highest level of the tree. | ||
// | ||
// Let F be full chunks (disregarding branching factor) and S be single references | ||
// in the following scenario: | ||
// | ||
// S | ||
// F F | ||
// F F F | ||
// F F F F S | ||
// | ||
// The result will be: | ||
// | ||
// SS | ||
// F F | ||
// F F F | ||
// F F F F | ||
// | ||
// After which the SS will be hashed to obtain the final root hash | ||
func (r *ReferenceHasher) moveDanglingChunk() { | ||
|
||
// calculate the total number of levels needed to represent the data (including the data level) | ||
targetLevel := getLevelsFromLength(r.length, r.params.SectionSize, r.params.Branches) | ||
|
||
// sum every intermediate level and write to the level above it | ||
for i := 1; i < targetLevel; i++ { | ||
|
||
// and if there is a single reference outside a balanced tree on this level | ||
// don't hash it again but pass it on to the next level | ||
if r.counts[i] > 0 { | ||
// TODO: simplify if possible | ||
if r.counts[i-1]-r.params.Spans[targetLevel-1-i] <= 1 { | ||
r.cursors[i+1] = r.cursors[i] | ||
r.cursors[i] = r.cursors[i-1] | ||
continue | ||
} | ||
} | ||
|
||
ref := r.sum(i) | ||
copy(r.buffer[r.cursors[i+1]:], ref) | ||
r.cursors[i+1] += len(ref) | ||
r.cursors[i] = r.cursors[i+1] | ||
} | ||
} |
Oops, something went wrong.