NebulousLabs · starius · Jul 25, 2017 · Jul 26, 2017 · Jul 27, 2017 · Jul 28, 2017
diff --git a/README.md b/README.md
@@ -1,5 +1,4 @@
-merkletree
-----------
+# merkletree
 
 merkletree is a Go package for working with [Merkle
 trees](http://en.wikipedia.org/wiki/Merkle_tree). Specifically, this package is
@@ -12,15 +11,13 @@ piece is part of the full file.
 When sha256 is used as the hashing algorithm, the implementation matches the
 merkle tree described in RFC 6962, 'Certificate Transparency'.
 
-Usage
------
+## Usage
 
 ```go
 package main
 
 import (
     "crypto/sha256"
-    "log"
     "os"
 
     "github.com/NebulousLabs/merkletree"
@@ -38,7 +35,7 @@ func main() {
 	file.Seek(0, 0) // Offset needs to be set back to 0.
 	proofIndex := uint64(7)
 	merkleRoot, proof, numLeaves, _ := merkletree.BuildReaderProof(file, sha256.New(), segmentSize, proofIndex)
-	verified := VerifyProof(sha256.New(), merkleRoot, proof, proofIndex, numLeaves)
+	verified := merkletree.VerifyProof(sha256.New(), merkleRoot, proof, proofIndex, numLeaves)
 
 	// Example 3: Using a Tree to build a merkle tree and get a proof for a
 	// specific index for non-file objects.
@@ -48,11 +45,11 @@ func main() {
 	tree.Push([]byte("another object"))
 	// The merkle root could be obtained by calling tree.Root(), but will also
 	// be provided by tree.Prove()
-	merkleRoot, proof, proofIndex, numLeaves := tree.Prove()
+	merkleRoot, proof, proofIndex, numLeaves = tree.Prove()
 
-	////////////////////////////////////////////////
-	/// Remaining examples deal with cached trees //
-	////////////////////////////////////////////////
+	////////////////////////////////////////////////////
+	/// Next group of examples deal with cached trees //
+	////////////////////////////////////////////////////
 
 	// Example 4: Creating a cached set of Merkle roots and then using them in
 	// a cached tree. The cached tree is height 1, meaning that all elements of
@@ -96,14 +93,115 @@ func main() {
 	// Now we can create the full proof for the cached tree, without having to
 	// rehash any of the elements from subtree1.
 	_, fullProof, _, _ := cachedTree.Prove(subtreeProof)
+
+	////////////////////////////////////////////////////////
+	/// Next group of examples deal with proofs of slices //
+	////////////////////////////////////////////////////////
+
+	// Example 7: Using a Tree to build a merkle tree and get a proof for a
+	// specific slice for non-file objects.
+	tree = merkletree.New(sha256.New())
+	tree.SetSlice(1, 3) // Objects 1 and 2.
+	tree.Push([]byte("an object - the tree will hash the data after it is pushed"))
+	tree.Push([]byte("the first part of the slice"))
+	tree.Push([]byte("the second part of the slice"))
+	tree.Push([]byte("another object"))
+	merkleRoot, proof, _, numLeaves = tree.Prove()
+	verified = merkletree.VerifyProofOfSlice(sha256.New(), merkleRoot, proof, 1, 3, numLeaves)
+
+	// Example 8: Build and verify a proof that the elements at segments 5-10
+	// are in the merkle root. The proof starts with the elements themselves.
+	file.Seek(0, 0) // Offset needs to be set back to 0.
+	proofBegin := uint64(5)
+	proofEnd := uint64(10) + 1
+	merkleRoot, proof, numLeaves, _ = merkletree.BuildReaderProofSlice(file, sha256.New(), segmentSize, proofBegin, proofEnd)
+	verified = merkletree.VerifyProofOfSlice(sha256.New(), merkleRoot, proof, proofBegin, proofEnd, numLeaves)
+
+	// Example 9: Cached tree of height 2, with proof slice entirely inside
+	// one cached subtree.
+	cachedTree = merkletree.NewCachedTree(sha256.New(), 2)
+	cachedTree.SetSlice(5, 7)
+	subtree1 = merkletree.New(sha256.New())
+	subtree1.Push([]byte("first leaf, first subtree"))
+	subtree1.Push([]byte("second leaf, first subtree"))
+	subtree1.Push([]byte("third leaf, first subtree"))
+	subtree1.Push([]byte("fourth leaf, first subtree"))
+	subtree2 = merkletree.New(sha256.New())
+	subtree2.SetSlice(1, 3)
+	subtree2.Push([]byte("first leaf, second subtree"))
+	subtree2.Push([]byte("second leaf, second subtree")) // in proof slice
+	subtree2.Push([]byte("third leaf, second subtree")) // in proof slice
+	subtree2.Push([]byte("fourth leaf, second subtree"))
+	cachedTree.Push(subtree1.Root())
+	cachedTree.Push(subtree2.Root())
+	_, subtreeProof, _, _ = subtree2.Prove()
+	// Now we can create the full proof for the cached tree, without having to
+	// rehash any of the elements from subtree1.
+	merkleRoot, fullProof, _, numLeaves = cachedTree.Prove(subtreeProof)
+	verified = merkletree.VerifyProofOfSlice(sha256.New(), merkleRoot, fullProof, 1, 3, numLeaves)
+
+	// Example 10: Cached tree of height 1, with proof slice consisting
+	// of several full subtrees.
+	cachedTree = merkletree.NewCachedTree(sha256.New(), 1)
+	cachedTree.SetSlice(2, 6)
+	subtree1 = merkletree.New(sha256.New())
+	subtree1.Push([]byte("first leaf, first subtree"))
+	subtree1.Push([]byte("second leaf, first subtree"))
+	subtree2 = merkletree.New(sha256.New())
+	subtree2.SetSlice(0, 2)
+	subtree2.Push([]byte("first leaf, second subtree")) // in proof slice
+	subtree2.Push([]byte("second leaf, second subtree")) // in proof slice
+	subtree3 := merkletree.New(sha256.New())
+	subtree3.SetSlice(0, 2)
+	subtree3.Push([]byte("first leaf, third subtree")) // in proof slice
+	subtree3.Push([]byte("second leaf, third subtree")) // in proof slice
+	subtree4 := merkletree.New(sha256.New())
+	subtree4.Push([]byte("first leaf, fourth subtree"))
+	subtree4.Push([]byte("second leaf, fourth subtree"))
+	cachedTree.Push(subtree1.Root())
+	cachedTree.Push(subtree2.Root())
+	cachedTree.Push(subtree2.Root())
+	cachedTree.Push(subtree4.Root())
+	_, subtreeProof1, _, _ := subtree2.Prove()
+	_, subtreeProof2, _, _ := subtree3.Prove()
+	subtreeProof = append(subtreeProof1, subtreeProof2...)
+	merkleRoot, fullProof, _, numLeaves = cachedTree.Prove(subtreeProof)
+	verified = merkletree.VerifyProofOfSlice(sha256.New(), merkleRoot, fullProof, 2, 6, numLeaves)
+
+	// Example 11: Cached tree of height 1, with proof slice consisting
+	// of cached elements hashes.
+	cachedTree = merkletree.NewCachedTree(sha256.New(), 1)
+	cachedTree.SetSlice(2, 6)
+	subtree1 = merkletree.New(sha256.New())
+	subtree1.Push([]byte("first leaf, first subtree"))
+	subtree1.Push([]byte("second leaf, first subtree"))
+	subtree2 = merkletree.New(sha256.New())
+	subtree2.Push([]byte("first leaf, second subtree")) // in proof slice
+	subtree2.Push([]byte("second leaf, second subtree")) // in proof slice
+	subtree3 = merkletree.New(sha256.New())
+	subtree3.Push([]byte("first leaf, third subtree")) // in proof slice
+	subtree3.Push([]byte("second leaf, third subtree")) // in proof slice
+	subtree4 = merkletree.New(sha256.New())
+	subtree4.Push([]byte("first leaf, fourth subtree"))
+	subtree4.Push([]byte("second leaf, fourth subtree"))
+	cachedTree.Push(subtree1.Root())
+	cachedTree.Push(subtree2.Root())
+	cachedTree.Push(subtree2.Root())
+	cachedTree.Push(subtree4.Root())
+	merkleRoot, fullProof, _, numLeaves = cachedTree.ProveCached()
+	verified = merkletree.VerifyProofOfCachedElements(sha256.New(), merkleRoot, fullProof, 1, 3, numLeaves)
+
+	_ = verified
+	_ = collectiveRoot
+	_ = revisedRoot
+	_ = fullProof
 }
 ```
 
 For more extensive documentation, refer to the
 [godoc](http://godoc.org/github.com/NebulousLabs/merkletree).
 
-Notes
------
+## Notes
 
 This implementation does not retain the entire Merkle tree in memory. Rather,
 as each new leaf is added to the tree, is it pushed onto a stack as a "subtree
@@ -127,3 +225,84 @@ hashed multiple times.
 
 When using the Reader functions (ReaderRoot and BuildReaderProof), the last
 segment will not be padded if there are not 'segmentSize' bytes remaining.
+
+## Format of proof
+
+### What is included to the proof
+
+A proof is a slice of slices of bytes. It begins with the leave data,
+then hashes of subtrees follow. Combining all leaves which are covered in
+these two groups (as leaves from the beginning of the proof or as leaves
+from the subtrees whose hashes constitute the second part of the proof)
+we get all leaves of the tree and each leave presents once.
+
+Example. Proof built in a tree of 5 leaves for element at index 2:
+
+```
+     ┌───┴──*
+  *──┴──┐   │
+┌─┴─┐ ┌─┴─* │
+0   1 2   3 4
+      *
+```
+
+Parts of the proof are marked with asterisks (*).
+
+If we build a proof for a slice, the rule is the same: first include all
+leaves from the target slice, then add hashes of all subtrees so that
+together with the target slice they cover all leaves, once.
+
+Example. Proof built in a tree of 7 leaves for the slice [2, 5).
+
+```
+     ┌─────┴─────┐
+  *──┴──┐     ┌──┴──*
+┌─┴─┐ ┌─┴─┐ ┌─┴─*   │
+0   1 2   3 4   5   6
+      *   * *
+```
+
+Example. Proof built in a tree of 7 leaves for the slice [3, 5).
+
+```
+     ┌─────┴─────┐
+  *──┴──┐     ┌──┴──*
+┌─┴─┐ *─┴─┐ ┌─┴─*   │
+0   1 2   3 4   5   6
+          * *
+```
+
+### The order of stuff in the proof
+
+The proof starts with the data items. For a proof of one element
+it is the element itself (one item in the main proof slice).
+In case of slice the data is represented as multiple items in the main
+proof slice, in the order of occurrence in the source data.
+
+Hashes of subtrees (constituting the second half of a proof) are sorted
+by height (ascending), then by occurrence in the source data. The height
+of an orphan subtree is equal to the height of its parent minus one.
+
+Some examples of how parts of proofs are ordered. A number corresponds
+to the place of this leave or subtree hash in the proof.
+
+```
+     ┌────┴───┐
+  5──┴──┐     │
+┌─┴─┐ 3─┴─┐ ┌─┴─4
+          1 2
+```
+
+```
+     ┌────┴───4
+  ┌──┴──┐     │
+3─┴─┐ ┌─┴─┐ ┌─┴─┐
+    1 2   3
+```
+
+```
+     ┌────┴───┐
+  5──┴──┐     │
+┌─┴─┐ ┌─┴─┐ ┌─┴─┐
+      1   2 3   4
+```
diff --git a/cachedtree.go b/cachedtree.go
@@ -10,8 +10,9 @@ import (
 // meaning every element added to the CachedTree is the root of a full Merkle
 // tree containing 2^height leaves.
 type CachedTree struct {
-	cachedNodeHeight uint64
-	trueProofIndex   uint64
+	cachedNodeHeight             uint64
+	trueProofBegin, trueProofEnd uint64
+	cachedBegin, cachedEnd       uint64
 	Tree
 }
 
@@ -31,41 +32,89 @@ func NewCachedTree(h hash.Hash, cachedNodeHeight uint64) *CachedTree {
 
 // Prove will create a proof that the leaf at the indicated index is a part of
 // the data represented by the Merkle root of the Cached Tree. The CachedTree
-// needs the proof set proving that the index is an element of the cached
-// element in order to create a correct proof. After proof is called, the
-// CachedTree is unchanged, and can receive more elements.
+// needs the proof set proving that the index or slice belongs to the cached
+// element in order to create a correct proof. If SetSlice was called on a slice
+// covering multiple cached elements (which means all affected cached elements
+// must be covered entirely), cachedProofSet is concatenation of proofs of
+// cached elements. After proof is called, the CachedTree is unchanged, and
+// can receive more elements.
+// Use VerifyProof or VerifyProofOfSlice to verify proofSet returned by this method.
 func (ct *CachedTree) Prove(cachedProofSet [][]byte) (merkleRoot []byte, proofSet [][]byte, proofIndex uint64, numLeaves uint64) {
 	// Determine the proof index within the full tree, and the number of leaves
 	// within the full tree.
 	leavesPerCachedNode := uint64(1) << ct.cachedNodeHeight
 	numLeaves = leavesPerCachedNode * ct.currentIndex
 
+	cut := ct.cachedEnd - ct.cachedBegin
+
 	// Get the proof set tail, which is generated based entirely on cached
 	// nodes.
 	merkleRoot, proofSetTail, _, _ := ct.Tree.Prove()
-	if len(proofSetTail) < 1 {
+	if len(proofSetTail) < int(cut) {
 		// The proof was invalid, return 'nil' for the proof set but accurate
 		// values for everything else.
-		return merkleRoot, nil, ct.trueProofIndex, numLeaves
+		return merkleRoot, nil, ct.trueProofBegin, numLeaves
 	}
 
 	// The full proof set is going to be the input cachedProofSet combined with
 	// the tail proof set. The one caveat is that the tail proof set has an
 	// extra piece of data at the first element - the verifier will assume that
 	// this data exists and therefore it needs to be omitted from the proof
 	// set.
-	proofSet = append(cachedProofSet, proofSetTail[1:]...)
-	return merkleRoot, proofSet, ct.trueProofIndex, numLeaves
+	proofSet = append(cachedProofSet, proofSetTail[cut:]...)
+	return merkleRoot, proofSet, ct.trueProofBegin, numLeaves
+}
+
+// ProveCached will create a proof of cached element values.
+// SetSlice must be called on a slice of leaves belonging to entire
+// cached elements.
+// Use VerifyProofOfCachedElements to verify proofSet returned by this method.
+func (ct *CachedTree) ProveCached() (merkleRoot []byte, proofSet [][]byte, proofIndex uint64, numLeaves uint64) {
+	// Determine the proof index within the full tree, and the number of leaves
+	// within the full tree.
+	leavesPerCachedNode := uint64(1) << ct.cachedNodeHeight
+	numLeaves = leavesPerCachedNode * ct.currentIndex
+
+	// Get the proof set, which is generated based entirely on cached nodes.
+	merkleRoot, proofSet, _, _ = ct.Tree.Prove()
+	if len(proofSet) < 1 {
+		// The proof was invalid, return 'nil' for the proof set but accurate
+		// values for everything else.
+		return merkleRoot, nil, ct.trueProofBegin, numLeaves
+	}
+	if (ct.trueProofEnd-ct.trueProofBegin)%(1<<ct.cachedNodeHeight) != 0 {
+		// SetIndex was called or SetSlice for a part of one cached element.
+		return merkleRoot, nil, ct.trueProofBegin, numLeaves
+	}
+	return merkleRoot, proofSet, ct.trueProofBegin, numLeaves
 }
 
 // SetIndex will inform the CachedTree of the index of the leaf for which a
 // storage proof is being created. The index should be the index of the actual
 // leaf, and not the index of the cached element containing the leaf. SetIndex
-// must be called on empty CachedTree.
+// or SetSlice must be called on empty CachedTree.
 func (ct *CachedTree) SetIndex(i uint64) error {
+	return ct.SetSlice(i, i+1)
+}
+
+// SetSlice will inform the CachedTree of the slice of leafs for which a
+// storage proof is being created. Indices should be the indices of the actual
+// leafs, and not the indices of the cached elements containing the leafs.
+// SetIndex or SetSlice must be called on empty CachedTree.
+// If SetSlice was called on a slice covering multiple cached elements, then
+// all affected cached elements must be covered entirely.
+func (ct *CachedTree) SetSlice(proofBegin, proofEnd uint64) error {
 	if ct.head != nil {
-		return errors.New("cannot call SetIndex on Tree if Tree has not been reset")
+		return errors.New("cannot call SetIndex or SetSlice on Tree if Tree has not been reset")
+	}
+	ct.trueProofBegin = proofBegin
+	ct.trueProofEnd = proofEnd
+	ct.cachedBegin = proofBegin / (1 << ct.cachedNodeHeight)
+	ct.cachedEnd = (proofEnd-1)/(1<<ct.cachedNodeHeight) + 1
+	if ct.cachedEnd != ct.cachedBegin+1 {
+		if proofBegin%(1<<ct.cachedNodeHeight) != 0 || proofEnd%(1<<ct.cachedNodeHeight) != 0 {
+			return errors.New("cannot call SetSlice affecting multiple cached elements and not covering entire cached elements")
+		}
 	}
-	ct.trueProofIndex = i
-	return ct.Tree.SetIndex(i / (1 << ct.cachedNodeHeight))
+	return ct.Tree.SetSlice(ct.cachedBegin, ct.cachedEnd)
 }