diff --git a/Cargo.lock b/Cargo.lock index e8962b0..9110186 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1158,7 +1158,7 @@ version = "0.15.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb" dependencies = [ - "encode_unicode", + "encode_unicode 0.3.6", "lazy_static", "libc", "unicode-width", @@ -1303,6 +1303,27 @@ dependencies = [ "typenum", ] +[[package]] +name = "csv" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" +dependencies = [ + "memchr", +] + [[package]] name = "ctr" version = "0.9.2" @@ -1545,6 +1566,16 @@ dependencies = [ "dirs-sys", ] +[[package]] +name = "dirs-next" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" +dependencies = [ + "cfg-if", + "dirs-sys-next", +] + [[package]] name = "dirs-sys" version = "0.4.1" @@ -1557,6 +1588,17 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "dirs-sys-next" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" +dependencies = [ + "libc", + "redox_users", + "winapi", +] + [[package]] name = "downcast-rs" version = "1.2.1" @@ -1651,6 +1693,12 @@ version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" +[[package]] +name = "encode_unicode" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" + [[package]] name = "encoding_rs" version = "0.8.34" @@ -2496,6 +2544,12 @@ version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" +[[package]] +name = "hermit-abi" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" + [[package]] name = "hex" version = "0.4.3" @@ -2834,6 +2888,17 @@ version = "2.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ddc24109865250148c2e0f3d25d4f0f479571723792d3802153c60922a4fb708" +[[package]] +name = "is-terminal" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "261f68e344040fbd0edea105bef17c66edf46f984ddb1115b775ce31be948f4b" +dependencies = [ + "hermit-abi 0.4.0", + "libc", + "windows-sys 0.52.0", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.1" @@ -3179,7 +3244,7 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec" dependencies = [ - "hermit-abi", + "hermit-abi 0.3.9", "libc", "wasi", "windows-sys 0.52.0", @@ -3349,7 +3414,7 @@ version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" dependencies = [ - "hermit-abi", + "hermit-abi 0.3.9", "libc", ] @@ -4048,6 +4113,20 @@ dependencies = [ "syn 2.0.82", ] +[[package]] +name = "prettytable-rs" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eea25e07510aa6ab6547308ebe3c036016d162b8da920dbb079e3ba8acf3d95a" +dependencies = [ + "csv", + "encode_unicode 1.0.0", + "is-terminal", + "lazy_static", + "term", + "unicode-width", +] + [[package]] name = "primitive-types" version = "0.12.2" @@ -5996,6 +6075,17 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "term" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c59df8ac95d96ff9bede18eb7300b0fda5e5d8d90960e76f8e14ae765eedbf1f" +dependencies = [ + "dirs-next", + "rustversion", + "winapi", +] + [[package]] name = "thiserror" version = "1.0.65" @@ -6989,6 +7079,7 @@ dependencies = [ "alloy-sol-types", "clap", "hex", + "prettytable-rs", "serde", "serde_json", "sp1-helper", @@ -7015,7 +7106,6 @@ dependencies = [ "rs_merkle", "serde", "serde_json", - "sp1-sdk", "sp1-zkvm", ] diff --git a/Cargo.toml b/Cargo.toml index 361b4e3..3faac11 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,6 +10,7 @@ resolver = "2" [workspace.dependencies] alloy-sol-types = "0.7.7" rs_merkle = "1.4.2" +prettytable-rs = "0.10" [patch.crates-io] arrow-buffer = { git = "https://github.com/yourbuddyconner/arrow-rs", branch = "master", package = "arrow-buffer" } \ No newline at end of file diff --git a/README.md b/README.md index 41ca77a..91ee1a8 100644 --- a/README.md +++ b/README.md @@ -1,263 +1,59 @@ # zkDB: Zero-Knowledge Database 🧠 -## Reflection on the Future of Blockchain Development +zkDB is a powerful, privacy-preserving database system that implements the Verifiable Database (VDB) Framework. It combines Merkle trees with zero-knowledge proofs using the SP1 zkVM. Designed for developers building trustless applications, zkDB offers verifiable data operations without compromising privacy. -Before diving into the technical details of zkDB, it's crucial to consider the broader context of blockchain development and its future direction. There's a growing recognition in the blockchain community that current blockchain virtual machines (EVMs, SVMs, Move, etc.) may not be sufficient to achieve feature parity with Web2 applications or to fully realize the potential of trustless applications. +## Key Features -This limitation points to an increasing need for off-chain services and computation. As blockchain platforms evolve, we're likely to see a shift towards: +- **Zero-Knowledge Proofs**: Prove data operations without revealing content +- **Complex Queries**: Go beyond simple inclusion proofs +- **Full Verifiability**: Every action (insert, query, prove) generates a proof +- **Stateless Design**: Entire database state can be serialized +- **zkVM Integration**: Allows for advanced computations while maintaining verifiability -1. A greater emphasis on off-chain services to extend blockchain capabilities. -2. Simpler tools and frameworks for building these off-chain services. -3. Blockchain networks optimizing for throughput and scale, while relying on off-chain services for complex computations. -4. Increased demand for developers skilled in building hybrid systems that leverage both on-chain and off-chain components. +## Why zkDB? -zkDB is positioned at the forefront of this trend. By combining the security of Merkle trees, the efficiency of off-chain computation, and the verifiability of zero-knowledge proofs, zkDB demonstrates how we can build powerful, scalable systems that extend beyond the limitations of traditional blockchain VMs. +zkDB addresses the growing need for off-chain services in blockchain development, offering a solution that extends beyond the limitations of traditional blockchain VMs. It's positioned at the forefront of the trend towards hybrid systems that leverage both on-chain and off-chain components. ---- +[Learn more about why zkDB matters](docs/why.md) -## What is zkDB? +## Understanding zkDB -zkDB is a Merkle tree-based database utilizing the [SP1](https://succinct.xyz) zkVM for secure and efficient operations. It supports ```insert```, ```query```, and ```prove``` commands, providing a foundation for verifiable data storage and retrieval. +- [What is the VDB Framework?](docs/what-is-vdb-framework.md) +- [How zkDB implements the VDB Framework](docs/why.md#how-zkdb-implements-the-vdb-framework) +- [zkDB Data Flow](docs/why.md#zkdb-data-flow) -## How It Works +## Quick Example -zkDB combines the power of Merkle trees with zero-knowledge proofs to create a verifiable database system. Here's a high-level overview of its operation: +Here's a simple example of how to use zkDB: -1. **Data Storage**: When data is inserted, it's stored as leaves in a Merkle tree. -2. **Merkle Tree**: The Merkle tree provides an efficient way to cryptographically verify the integrity of large datasets. -3. **Zero-Knowledge Proofs**: Using SP1 zkVM, the system can generate proofs of data inclusion without revealing the actual data. -4. **Verifiable Queries**: Users can query data and receive both the result and a proof of its correctness. +```bash +# Insert a key-value pair +cargo run --release --bin merkle -- insert user123 "John Doe" -## How zkDB Enhances Standard Merkle Trees +# Query a value +cargo run --release --bin merkle -- query user123 -zkDB builds on Merkle trees, adding crucial features: - -1. **Zero-Knowledge Proofs**: Prove data operations without revealing content. -2. **Complex Queries**: Go beyond simple inclusion proofs. -3. **Full Verifiability**: Every action (insert, query, prove) generates a proof. -4. **Stateless Design**: Entire database state can be serialized, enabling unique distributed use cases. -5. **zkVM Integration**: Allows for advanced computations while maintaining verifiability. - -These additions transform zkDB from a simple Merkle tree into a powerful, privacy-preserving database system with broad applications in trustless environments. - -```mermaid -graph TD - A[Standard Merkle Tree] -->|Add ZK Proofs| B[zkDB] - B -->|Enable| C[Data Privacy] - B -->|Support| D[Complex Queries] - B -->|Provide| E[Full Verifiability] - B -->|Allow| F[Stateless Operations] - B -->|Integrate| G[Advanced Computations] +# Generate a proof +cargo run --release --bin merkle -- prove user123 ``` -## How zkDB Differs from Standard Merkle Trees - -While zkDB builds upon the foundation of Merkle trees, it extends their capabilities in several key ways: - -1. **Zero-Knowledge Proofs**: Unlike standard Merkle trees, zkDB integrates with the SP1 zkVM to generate zero-knowledge proofs. This allows for verifiable computations without revealing the underlying data. - -2. **Stateless Execution**: zkDB is designed for stateless execution within the zkVM environment. The entire state is serialized and deserialized between operations, allowing for verifiable state transitions. - -3. **Complex Operations**: While Merkle trees typically offer simple inclusion proofs, zkDB can support more complex operations and queries, all of which can be proven in zero-knowledge. - -4. **Verifiable Queries**: In addition to proving inclusion, zkDB can prove the correctness of query results without revealing the actual data. - -5. **Integration with zkVM**: The use of SP1 zkVM allows for arbitrary computations on the data, not just simple lookups. This enables more sophisticated database operations while maintaining verifiability. - -6. **Proof Generation for All Operations**: Every operation (insert, query, prove) can generate a proof, not just inclusion proofs as in standard Merkle trees. - -7. **Serializable State**: The entire database state, including the Merkle tree structure, is serialized and can be passed around or stored externally, enabling unique use cases in distributed systems. - -These features make zkDB more than just a Merkle tree implementation. It's a full-fledged verifiable database system that leverages zero-knowledge proofs to provide enhanced privacy and verifiability for complex operations. - -### Process Flow - -```mermaid -graph TD - A[zkDB CLI] -->|Prepare Input| B(SP1 zkVM) - B -->|Execute Operation| C{Merkle Tree} - C -->|Generate Result| D[Output + Proof] - D -->|Return| A - E[State File] <-->|Read/Write| A -``` - -1. The zkDB CLI prepares the input for the SP1 zkVM. -2. The zkVM executes the requested operation on the Merkle tree. -3. Results and proofs are generated. -4. The output is returned to the CLI. -5. State is maintained between operations using a local file. - -## Key Concepts - -- **Merkle Tree**: A tree in which every leaf node is labelled with the cryptographic hash of a data block, and every non-leaf node is labelled with the cryptographic hash of the labels of its child nodes. -- **Zero-Knowledge Proofs**: Cryptographic methods by which one party can prove to another party that they know a value x, without conveying any information apart from the fact that they know the value x. -- **SP1 zkVM**: A zero-knowledge virtual machine that allows for the creation and verification of proofs for arbitrary computations. - -## Use Cases - -- **Verifiable Data Storage**: Ensure data integrity in distributed systems. -- **Privacy-Preserving Queries**: Prove data existence without revealing the data itself. -- **Audit Trails**: Create verifiable logs of data operations. -- **Decentralized Applications**: Build trustless systems with verifiable state transitions. - -## Prerequisites - -- Rust and Cargo (latest stable version) -- SP1 zkVM toolchain - -## Building the Project +## Documentation -1. Clone the repository and navigate to the project directory: +- [Getting Started Guide](docs/getting-started.md) +- [Usage Guide](docs/usage.md) - ``` - git clone - cd zkdb - ``` +## Benchmarks -2. Build the project in release mode: - - ``` - cargo build --release - ``` - - *Note: It's crucial to use the ```--release``` flag when building, as sp1-sdk must be built in release mode.* - -## Using the zkDB Script - -The zkDB script provides a command-line interface to interact with the Merkle tree database. Here's how to use it: - -1. Ensure you're in the project root directory. - -2. Run the script using Cargo in release mode: - - ``` - cargo run --release --bin merkle -- [arguments] - ``` - - Replace `````` with one of the following: ```insert```, ```query```, or ```prove```. - - *Remember to always use the ```--release``` flag when running the script.* - -### Commands - -#### Insert - -To insert a key-value pair: - -``` -cargo run --release --bin merkle -- insert -``` - -**Example:** - -``` -cargo run --release --bin merkle -- insert mykey myvalue -``` - -#### Query - -To query a value by key: - -``` -cargo run --release --bin merkle -- query -``` - -**Example:** - -``` -cargo run --release --bin merkle -- query mykey -``` - -#### Prove - -To generate a proof for a key: - -``` -cargo run --release --bin merkle -- prove -``` - -**Example:** - -``` -cargo run --release --bin merkle -- prove mykey -``` - -### Generating SP1 Proofs - -To generate and verify an SP1 proof along with any command, add the ```--prove``` flag: - -``` -cargo run --release --bin merkle -- [arguments] --prove -``` - -**Examples:** - -``` -cargo run --release --bin merkle -- insert mykey myvalue --prove -cargo run --release --bin merkle -- query mykey --prove -cargo run --release --bin merkle -- prove mykey --prove -``` - -These commands will execute the respective operations and also generate and verify an SP1 proof. - -### State Management - -The script automatically manages the state of the Merkle tree. The state is passed between operations as a base64-encoded JSON string. This allows for stateless execution of the zkVM program while maintaining continuity between commands. +Check out our [usage guide](docs/usage.md#benchmark-results) for the latest performance benchmarks. ## Project Structure -- ```src/main.rs```: Contains the main zkVM program logic for Merkle tree operations. -- ```src/bin/merkle.rs```: Implements the command-line interface for interacting with the zkVM program. -- ```tests/integration_tests.rs```: Contains integration tests for the zkVM program. -- ```script/src/bin/db.rs```: [Describe purpose or remove if unused] -- ```script/merkle_state.txt```: Stores the current state of the Merkle tree. -- ```.env.example```: Example environment variables configuration. -- ```README.md```: Project documentation. - -## Running Tests - -To run the integration tests: - -``` -cargo test -``` - -This will execute the tests defined in ```tests/integration_tests.rs```. - -## Implementation Details - -- The project uses the ```rs_merkle``` crate for Merkle tree operations. -- The ```sp1-zkvm``` crate is used for zkVM-specific functionality. -- State is serialized and deserialized using ```serde_json``` and ```base64``` encoding. - -## Benchmark Results - -Here are the benchmark results for the main operations: - -``` -+-----------+--------+--------------+-------------+ -| Operation | Cycles | Total Time | Avg Time | -+-----------+--------+--------------+-------------+ -| insert | 44453 | 1.076435122s | 10.764351ms | -+-----------+--------+--------------+-------------+ -| query | 19995 | 1.038018872s | 10.380188ms | -+-----------+--------+--------------+-------------+ -| prove | 20024 | 1.059557666s | 10.595576ms | -+-----------+--------+--------------+-------------+ -``` - -These results show the number of cycles, total time, and average time for each operation over 100 iterations. - -## Future Enhancements - -- Implement batch operations for improved efficiency. -- Add support for more complex query types. -- Integrate with distributed storage systems. -- Develop a high-level API for easier integration with other applications. - -## Note +- `src/main.rs`: Contains the main zkVM program logic for Merkle tree operations. +- `src/bin/merkle.rs`: Implements the command-line interface for interacting with the zkVM program. +- `tests/integration_tests.rs`: Contains integration tests for the zkVM program. +- `script/merkle_state.txt`: Stores the current state of the Merkle tree. -This project is a demonstration of using SP1 zkVM for Merkle tree operations. It's not intended for production use without further security audits and optimizations. +For more details, see our [Getting Started Guide](docs/getting-started.md#project-structure). ## Contributing diff --git a/crates/script/Cargo.toml b/crates/script/Cargo.toml index 16a4582..7c495de 100644 --- a/crates/script/Cargo.toml +++ b/crates/script/Cargo.toml @@ -21,6 +21,10 @@ clap = { version = "4.0", features = ["derive", "env"] } tracing = "0.1.40" hex = "0.4.3" alloy-sol-types = { workspace = true } +prettytable-rs = "0.10" + +[dev-dependencies] +sp1-sdk = "2.0.0" [build-dependencies] sp1-helper = "2.0.0" diff --git a/crates/script/merkle_state.txt b/crates/script/merkle_state.txt new file mode 100644 index 0000000..190fa53 --- /dev/null +++ b/crates/script/merkle_state.txt @@ -0,0 +1 @@ +eyJsZWF2ZXMiOltbMjUyLDIyMiw0Myw0NiwyMTksMTY1LDEwNywyNDQsOCw5NiwzMSwxODMsMzMsMjU0LDE1NSw5Miw1MSwxNDEsMTYsMjM4LDY2LDE1OCwxNjAsNzksMTc0LDg1LDE3LDE4MiwxNDMsMTkxLDE0MywxODVdLFsxMDYsMjE2LDE1MywzOSw5OSwxMzgsNzksMzcsMTQ4LDE3NCwzNywxMTQsMjA4LDE2OSwxMDYsNzMsNjgsNTgsOTQsNDksMTAsMTI0LDE3OSwxMDUsMTM2LDE1MSwxMjYsMjUyLDIyOCwxMDgsMTU0LDJdLFsxMzgsMTA0LDIzNywxMDEsMjM0LDIyMiwyMDAsMTE0LDI0Niw5NSw2OSwxODcsMjQ3LDI2LDIyLDEyMSwyMDgsMTY3LDE0LDE4OSwyMzIsMTM3LDE2OSwzOCwxNjksODcsMjQ1LDg1LDIyNCwxNzUsNDUsMTc2XV0sImtleV9pbmRpY2VzIjp7ImFub3RoZXJrZXkiOjIsImZvbyI6MCwibXlrZXkiOjF9fQ== \ No newline at end of file diff --git a/crates/script/src/bin/merkle.rs b/crates/script/src/bin/merkle.rs index 45bbe2e..1c0f7ed 100644 --- a/crates/script/src/bin/merkle.rs +++ b/crates/script/src/bin/merkle.rs @@ -22,9 +22,20 @@ pub const ZKDB_MERKLE_ELF: &[u8] = include_bytes!("../../../../elf/riscv32im-suc /// Commands for the Merkle client. #[derive(Subcommand, Debug)] enum Command { - Insert { key: String, value: String }, - Query { key: String }, - Prove { key: String }, + Insert { + key: String, + value: String, + #[clap(long, help = "Generate and verify an SP1 proof")] + prove: bool, + }, + Query { + key: String, + #[clap(long, help = "Generate and verify an SP1 proof")] + prove: bool, + }, + Prove { + key: String, + }, } /// The arguments for the command. @@ -33,9 +44,6 @@ enum Command { struct Args { #[clap(subcommand)] command: Command, - - #[clap(long, global = true, help = "Generate and verify an SP1 proof")] - prove: bool, } fn main() { @@ -55,7 +63,7 @@ fn main() { // Prepare the command input. let command_json = match &args.command { - Command::Insert { key, value } => serde_json::json!({ + Command::Insert { key, value, prove } => serde_json::json!({ "command": "insert", "params": { "key": key, @@ -63,7 +71,7 @@ fn main() { }, "state": state, }), - Command::Query { key } => serde_json::json!({ + Command::Query { key, prove } => serde_json::json!({ "command": "query", "params": { "key": key, @@ -115,16 +123,23 @@ fn main() { info!("Number of cycles: {}", report.total_instruction_count()); // Generate and verify proof if requested - if args.prove { - info!("Generating and verifying proof..."); - let (pk, vk) = client.setup(ZKDB_MERKLE_ELF); - let proof = client - .prove(&pk, stdin) - .run() - .expect("Failed to generate proof"); - println!("Proof generated successfully."); - client.verify(&proof, &vk).expect("Failed to verify proof"); - println!("Proof verified successfully."); + match &args.command { + Command::Insert { prove, .. } | Command::Query { prove, .. } => { + if *prove { + info!("Generating and verifying proof..."); + let (pk, vk) = client.setup(ZKDB_MERKLE_ELF); + let proof = client + .prove(&pk, stdin.clone()) + .run() + .expect("Failed to generate proof"); + println!("Proof generated successfully."); + client.verify(&proof, &vk).expect("Failed to verify proof"); + println!("Proof verified successfully."); + } + } + Command::Prove { key } => { + // Handle additional logic for the Prove subcommand if necessary + } } } Err(e) => error!("Execution failed: {:?}", e), diff --git a/crates/script/src/bin/merkle_benchmark.rs b/crates/script/src/bin/merkle_benchmark.rs new file mode 100644 index 0000000..fbbc333 --- /dev/null +++ b/crates/script/src/bin/merkle_benchmark.rs @@ -0,0 +1,123 @@ +use clap::Parser; +use prettytable::{row, Table}; +use sp1_sdk::{ProverClient, SP1Stdin}; +use std::time::Instant; +use tracing::log::{error, info}; + +/// The ELF file for the zkdb-merkle program. +pub const ZKDB_MERKLE_ELF: &[u8] = include_bytes!("../../../../elf/riscv32im-succinct-zkvm-elf"); + +#[derive(Parser, Debug)] +struct Args { + #[clap(long, default_value = "100")] + iterations: usize, +} + +struct BenchmarkResult { + operation: String, + cycles: u64, + total_time: std::time::Duration, + avg_time: std::time::Duration, +} + +fn main() { + sp1_sdk::utils::setup_logger(); + let args = Args::parse(); + + let client = ProverClient::new(); + + let insert_result = benchmark_operation(&client, "insert", args.iterations); + let query_result = benchmark_operation(&client, "query", args.iterations); + let prove_result = benchmark_operation(&client, "prove", args.iterations); + + print_results(&[insert_result, query_result, prove_result]); +} + +fn benchmark_operation( + client: &ProverClient, + operation: &str, + iterations: usize, +) -> BenchmarkResult { + let mut total_time = std::time::Duration::new(0, 0); + let cycles; + + // Execute once to get the cycle count + let command = create_command(operation, 0); + let mut stdin = SP1Stdin::new(); + stdin.write(&command); + + match client.execute(ZKDB_MERKLE_ELF, stdin).run() { + Ok((_, report)) => { + cycles = report.total_instruction_count(); + } + Err(e) => { + error!("Execution failed: {:?}", e); + cycles = 0; + } + } + + // Run multiple iterations for timing + for i in 0..iterations { + let command = create_command(operation, i); + let mut stdin = SP1Stdin::new(); + stdin.write(&command); + + let start = Instant::now(); + match client.execute(ZKDB_MERKLE_ELF, stdin).run() { + Ok(_) => { + total_time += start.elapsed(); + } + Err(e) => error!("Execution failed: {:?}", e), + } + } + + BenchmarkResult { + operation: operation.to_string(), + cycles, + total_time, + avg_time: total_time / iterations as u32, + } +} + +fn create_command(operation: &str, i: usize) -> String { + match operation { + "insert" => format!( + "{{ + \"command\": \"insert\", + \"params\": {{ + \"key\": \"key{}\", + \"value\": \"value{}\" + }}, + \"state\": null + }}", + i, i + ), + "query" | "prove" => format!( + "{{ + \"command\": \"{}\", + \"params\": {{ + \"key\": \"key{}\" + }}, + \"state\": null + }}", + operation, i + ), + _ => panic!("Unknown operation: {}", operation), + } +} + +fn print_results(results: &[BenchmarkResult]) { + let mut table = Table::new(); + table.add_row(row!["Operation", "Cycles", "Total Time", "Avg Time"]); + + for result in results { + table.add_row(row![ + result.operation, + result.cycles, + format!("{:?}", result.total_time), + format!("{:?}", result.avg_time) + ]); + } + + table.printstd(); +} diff --git a/crates/zkdb-merkle/tests/integration_tests.rs b/crates/script/tests/integration_tests.rs similarity index 98% rename from crates/zkdb-merkle/tests/integration_tests.rs rename to crates/script/tests/integration_tests.rs index 7464625..45e6ed1 100644 --- a/crates/zkdb-merkle/tests/integration_tests.rs +++ b/crates/script/tests/integration_tests.rs @@ -51,6 +51,7 @@ fn run_program(client: &ProverClient, input_json: serde_json::Value) -> Value { let mut stdin = SP1Stdin::new(); stdin.write(&command_str); + // Update the ELF file path let zkdb_merkle_elf = include_bytes!("../../../elf/riscv32im-succinct-zkvm-elf"); let (output, _) = client diff --git a/crates/zkdb-merkle/Cargo.toml b/crates/zkdb-merkle/Cargo.toml index cb9ae31..92090f7 100644 --- a/crates/zkdb-merkle/Cargo.toml +++ b/crates/zkdb-merkle/Cargo.toml @@ -10,7 +10,4 @@ serde = { version = "1.0", default-features = false, features = ["derive", "allo serde_json = { version = "1.0", default-features = false, features = ["alloc"] } base64 = { version = "0.13", default-features = false, features = ["alloc"] } hex = { version = "0.4", default-features = false, features = ["alloc"] } -bincode = "1.3" - -[dev-dependencies] -sp1-sdk = "2.0.0" +bincode = { version = "1.3", default-features = false } \ No newline at end of file diff --git a/docs/getting-started.md b/docs/getting-started.md new file mode 100644 index 0000000..96d8284 --- /dev/null +++ b/docs/getting-started.md @@ -0,0 +1,77 @@ +# Getting Started with zkDB + +## Prerequisites + +- Rust and Cargo (latest stable version) +- SP1 zkVM toolchain + +## Setting up SP1 + +Before building the zkDB project, you need to set up the SP1 zkVM toolchain. Follow these steps: + +1. Install the required dependencies: + - Git + - Rust (Nightly) + - Docker + +2. Install SP1 using the prebuilt binaries (recommended): + + ``` + curl -L https://sp1.succinct.xyz | bash + ``` + + Follow the on-screen instructions to make the `sp1up` command available in your CLI. + +3. Run `sp1up` to install the toolchain and the `cargo prove` CLI: + + ``` + sp1up + ``` + + This will install: + - The `succinct` Rust toolchain with support for the `riscv32im-succinct-zkvm-elf` compilation target. + - The `cargo prove` CLI tool for compiling SP1 programs and other helper functionality. + +4. Verify the installation: + + ``` + cargo prove --version + RUSTUP_TOOLCHAIN=succinct cargo --version + ``` + +For more detailed instructions or troubleshooting, refer to the [official SP1 installation guide](https://docs.succinct.xyz/getting-started/install.html). + +## Building the Project + +1. Clone the repository and navigate to the project directory: + + ``` + git clone + cd zkdb + ``` + +2. Build the project in release mode: + + ``` + cargo build --release + ``` + + *Note: It's crucial to use the `--release` flag when building, as sp1-sdk must be built in release mode.* + +## Project Structure + +- `src/main.rs`: Contains the main zkVM program logic for Merkle tree operations. +- `src/bin/merkle.rs`: Implements the command-line interface for interacting with the zkVM program. +- `tests/integration_tests.rs`: Contains integration tests for the zkVM program. +- `script/merkle_state.txt`: Stores the current state of the Merkle tree. +- `.env.example`: Example environment variables configuration. + +## Running Tests + +To run the integration tests: + +``` +cargo test +``` + +This will execute the tests defined in `tests/integration_tests.rs`. diff --git a/docs/usage.md b/docs/usage.md new file mode 100644 index 0000000..8e5735f --- /dev/null +++ b/docs/usage.md @@ -0,0 +1,111 @@ +# Using zkDB + +## Command-Line Interface + +The zkDB script provides a command-line interface to interact with the Merkle tree database. + +### Basic Usage + +Run the script using Cargo in release mode: + +``` +cargo run --release --bin merkle -- [arguments] +``` + +Replace `` with one of the following: `insert`, `query`, or `prove`. + +*Remember to always use the `--release` flag when running the script.* + +### Commands + +#### Insert + +To insert a key-value pair: + +``` +cargo run --release --bin merkle -- insert +``` + +**Example:** + +``` +cargo run --release --bin merkle -- insert mykey myvalue +``` + +#### Query + +To query a value by key: + +``` +cargo run --release --bin merkle -- query +``` + +**Example:** + +``` +cargo run --release --bin merkle -- query mykey +``` + +#### Prove + +To generate a proof for a key: + +``` +cargo run --release --bin merkle -- prove +``` + +**Example:** + +``` +cargo run --release --bin merkle -- prove mykey +``` + +### Generating SP1 Proofs + +To generate and verify an SP1 proof along with any command, add the `--prove` flag: + +``` +cargo run --release --bin merkle -- [arguments] --prove +``` + +**Examples:** + +``` +cargo run --release --bin merkle -- insert mykey myvalue --prove +cargo run --release --bin merkle -- query mykey --prove +cargo run --release --bin merkle -- prove mykey --prove +``` + +These commands will execute the respective operations and also generate and verify an SP1 proof. + +### State Management + +The script automatically manages the state of the Merkle tree. The state is passed between operations as a base64-encoded JSON string. This allows for stateless execution of the zkVM program while maintaining continuity between commands. + +## Implementation Details + +- The project uses the `rs_merkle` crate for Merkle tree operations. +- The `sp1-zkvm` crate is used for zkVM-specific functionality. +- State is serialized and deserialized using `serde_json` and `base64` encoding. + +## Benchmark Results + +Here are the benchmark results for the main operations: + +``` ++-----------+--------+--------------+-------------+ +| Operation | Cycles | Total Time | Avg Time | ++-----------+--------+--------------+-------------+ +| insert | 44453 | 1.076435122s | 10.764351ms | ++-----------+--------+--------------+-------------+ +| query | 19995 | 1.038018872s | 10.380188ms | ++-----------+--------+--------------+-------------+ +| prove | 20024 | 1.059557666s | 10.595576ms | ++-----------+--------+--------------+-------------+ +``` + +These results show the number of cycles, total time, and average time for each operation over 100 iterations. + +## Note + +This project is a demonstration of using SP1 zkVM for Merkle tree operations. It's not intended for production use without further security audits and optimizations. diff --git a/docs/what-is-vdb-framework.md b/docs/what-is-vdb-framework.md new file mode 100644 index 0000000..f446cf1 --- /dev/null +++ b/docs/what-is-vdb-framework.md @@ -0,0 +1,140 @@ +# What is the Verifiable Database (VDB) Framework? + +## Preamble: Understanding Blockchain + +Before diving into the VDB Framework, let's briefly explain what a blockchain is and why it's important: + +A blockchain is a decentralized, digital ledger that records transactions across many computers. Its key features are: + +1. **Decentralization**: No single entity controls the entire network. +2. **Transparency**: All transactions are visible to everyone on the network. +3. **Immutability**: Once recorded, data cannot be altered without consensus. +4. **Security**: Cryptographic techniques ensure data integrity and authenticity. + +Blockchains are used for various purposes, including: +- Cryptocurrencies (e.g., Bitcoin) +- Smart contracts (e.g., Ethereum) +- Supply chain tracking +- Voting systems +- Decentralized finance (DeFi) + +The main goal of blockchain technology is to create trust in a trustless environment, enabling secure transactions and agreements without the need for intermediaries. + +## Introduction to the VDB Framework + +The Verifiable Database (VDB) Framework is a concept that bridges the gap between traditional databases and blockchain technology. It addresses key challenges in the blockchain space and provides solutions for businesses looking to leverage blockchain benefits without completely overhauling their existing systems. + +## Why Do We Need the VDB Framework? + +To understand the need for the VDB Framework, let's consider three key points: + +1. **On-Chain State is Expensive**: + - Storing data directly on a blockchain (on-chain) is costly and becomes more expensive as demand increases. + - This makes it impractical for businesses to store large amounts of data or perform complex computations directly on the blockchain. + +2. **Existing Businesses Have Off-Chain State**: + - Most businesses already have established databases and systems that operate off the blockchain (off-chain). + - Completely migrating these systems to the blockchain would be impractical, expensive, and often unnecessary. + +3. **Crypto Rails Offer Great Distribution, But Are Hard to Capitalize On**: + - Blockchain and cryptocurrency technologies provide excellent tools for distribution and decentralization. + - However, it's challenging for businesses to take advantage of these benefits while maintaining their existing data structures and privacy requirements. + +The VDB Framework aims to solve these issues by providing a middle ground—allowing businesses to keep their data off-chain while still benefiting from blockchain's verifiability and distribution capabilities. + +## What Does the VDB Framework Do? + +The VDB Framework combines traditional database functionalities with cryptographic proof systems. It allows you to: + +1. **Keep Data Off-Chain**: Store your data in traditional, efficient database systems. +2. **Prove Data Operations**: Generate proofs about your data and operations without revealing the data itself. +3. **Verify On-Chain**: Use blockchain to verify these proofs, ensuring data integrity and operation correctness. + +This approach gives you the best of both worlds: the efficiency and privacy of off-chain storage with the verifiability and trust of blockchain systems. + +## Core Components of the VDB Framework + +The VDB Framework consists of three main types of proofs: + +```mermaid +graph TD + A[VDB Framework] --> B[Provenance Proofs] + A --> C[State Transition Proofs] + A --> D[Query Proofs] + B --> E[Prove Data Origin] + C --> F[Verify Database Changes] + D --> G[Validate Query Execution] + E --> H[e.g., Verify Credit Score] + F --> I[e.g., Verify Fund Transfer] + G --> J[e.g., Verify Account Balance] +``` + +1. **Provenance Proofs**: + - Prove where data came from without revealing the data itself. + - *Example*: Prove a user's credit score is above 700 without revealing the exact score. + +2. **State Transition Proofs**: + - Prove that database changes were made correctly. + - *Example*: Prove that funds were transferred between accounts correctly without revealing account details. + +3. **Query Proofs**: + - Prove that a database query was executed correctly and the result is accurate. + - *Example*: Prove that a user's account balance is above a certain threshold without revealing the exact balance. + +These proofs work together to ensure data integrity, privacy, and verifiability in the VDB Framework. + +## How Can Businesses Use Verifiable DBs? + +Here are some practical examples of how businesses might use a verifiable database: + +1. **Financial Services**: + - **Regulatory Compliance**: Prove compliance with financial regulations without revealing sensitive customer data. + - **Transaction Integrity**: Verify that transactions are processed correctly without exposing account details. + +2. **Supply Chain Management**: + - **Product Provenance**: Prove the origin and journey of products without revealing proprietary supply chain data. + - **Quality Assurance**: Verify that quality checks were performed correctly without exposing the details of the checks. + +3. **Healthcare**: + - **Patient Privacy**: Prove that a patient meets certain criteria for a study without revealing their medical records. + - **Procedure Verification**: Ensure that proper medical procedures were followed without exposing patient data. + +4. **Voting Systems**: + - **Vote Integrity**: Prove that votes were counted correctly without revealing individual votes. + - **Voter Eligibility**: Verify voter eligibility without exposing personal information. + +By using verifiable databases, businesses can leverage the trust and verifiability of blockchain technology while maintaining the privacy and efficiency of their existing database systems. + +## How Can Software Teams Use the VDB Framework? + +The VDB Framework is primarily designed for software teams creating database technologies. Here's how they can leverage it: + +1. **Developing New Database Systems**: + - **Blueprint for Verifiability**: Use the VDB Framework as a guide to create databases with built-in verifiability features. + - **Implementing Proofs**: Integrate the three types of proofs (Provenance, State Transition, and Query) into the database architecture. + +2. **Enhancing Existing Databases**: + - **Adding Verifiability**: Incorporate VDB concepts into existing database systems to introduce verifiability without overhauling the entire system. + - **Middleware Development**: Create middleware solutions that interface between traditional databases and blockchain systems, embedding VDB principles. + +3. **Building Blockchain Bridges**: + - **Secure Interactions**: Develop interfaces that allow traditional databases to interact securely with blockchain systems. + - **Proof Generation and Verification**: Create APIs and tools that handle the generation and verification of proofs for database operations. + +4. **Creating Development Tools**: + - **Libraries and SDKs**: Develop libraries and software development kits that simplify the implementation of VDB concepts for other developers. + - **Testing Frameworks**: Build testing tools specifically designed for verifiable database systems to ensure correctness and security. + +5. **Optimizing Performance**: + - **Efficient Proof Algorithms**: Research and implement efficient algorithms for generating and verifying proofs to minimize performance overhead. + - **Balancing Verifiability and Speed**: Develop strategies to manage the trade-off between the level of verifiability and system performance. + +6. **Ensuring Compliance**: + - **Data Protection Integration**: Implement features that help businesses comply with data protection regulations by leveraging the privacy features of verifiable databases. + - **Audit Tools**: Create auditing tools that utilize the verifiability features of VDB-based systems to streamline compliance processes. + +By using the VDB Framework, software teams can create robust, verifiable database solutions that offer enhanced security, privacy, and trust, catering to a wide range of applications that require reliable data management. + +## Summary + +The Verifiable Database (VDB) Framework provides a structured approach to integrating blockchain's trustless verifiability with traditional database systems' efficiency and privacy. By leveraging cryptographic proofs, VDBs enable businesses and software teams to achieve secure, private, and verifiable data operations without the high costs and complexities of on-chain storage. diff --git a/docs/why.md b/docs/why.md new file mode 100644 index 0000000..f41e5f7 --- /dev/null +++ b/docs/why.md @@ -0,0 +1,119 @@ +# Why zkDB? + +## The Future of Blockchain Development + +Blockchain technology is powerful, but current virtual machines (like EVMs, SVMs, and Move) have limitations. They struggle to offer the same features as traditional web applications. This means we need to look beyond the blockchain for more complex computations. + +We're seeing a shift towards: + +1. **Off-Chain Services**: Using services outside the blockchain to handle advanced tasks. +2. **Simpler Tools**: Creating easier ways to build these off-chain services. +3. **Improved Scalability**: Blockchains focusing on handling more transactions efficiently. +4. **Hybrid Systems**: Developers combining on-chain and off-chain components. + +zkDB is at the forefront of this change, implementing the Verifiable Database (VDB) Framework. It combines secure Merkle trees, efficient off-chain computation, and zero-knowledge proofs. This allows us to build powerful systems beyond the limits of traditional blockchain virtual machines. + +--- + +## What is zkDB? + +zkDB is an implementation of the Verifiable Database (VDB) Framework that uses Merkle trees and the [SP1](https://succinct.xyz) zkVM for secure and efficient operations. It supports `insert`, `query`, and `prove` commands, providing a foundation for verifiable data storage and retrieval. + +## How zkDB Implements the VDB Framework + +zkDB realizes the core properties of the VDB Framework: + +1. **Provenance Proofs**: Uses Merkle inclusion proofs for on-chain data and supports zkTLS for off-chain data sources. +2. **Recursive State Proofs**: Implements efficient state transition proofs using the SP1 zkVM. +3. **Query Proofs**: Generates verifiable proofs for all database operations. + +## How zkDB Enhances Merkle Trees + +zkDB adds important features to standard Merkle trees: + +1. **Zero-Knowledge Proofs**: Keeps data private while proving information about it. +2. **Complex Queries**: Handles more than just simple checks. +3. **Full Verifiability**: Every action can be proven to be correct. +4. **Stateless Design**: The entire database can be saved and shared easily. +5. **zkVM Integration**: Allows advanced computations while staying secure. + +These additions turn zkDB into a powerful, privacy-preserving database system suitable for trustless environments. + +## How zkDB is Different from Standard Merkle Trees + +Unlike regular Merkle trees, zkDB: + +- **Uses Zero-Knowledge Proofs**: Proves data operations without revealing the data. +- **Supports Stateless Execution**: The whole state can be serialized and shared. +- **Handles Complex Operations**: Not limited to simple lookups. +- **Offers Verifiable Queries**: Users can verify results without seeing the data. +- **Integrates with zkVM**: Enables advanced computations securely. +- **Generates Proofs for All Operations**: Every action, like insert or query, can produce a proof. +- **Serializable State**: Useful for distributed systems where the state is shared. + +These features make zkDB more than a simple Merkle tree—it's a full implementation of the VDB Framework that leverages zero-knowledge proofs for enhanced privacy and verification. + +## zkDB Data Flow + +The following diagram illustrates the simplified data flow in zkDB: + +```mermaid +graph TD + A[Client] --> B(zkDB) + B --> C{SP1 zkVM} + C --> D[Merkle Tree] + C --> E[zkVM Execution Proof] + D --> F[Merkle Proof] + D --> G[New Merkle Root] + G --> B + E --> H[zkVM Verifier] + F --> I[Merkle Verifier] + B --> A + + classDef process fill:#f9f,stroke:#333,stroke-width:2px + classDef data fill:#bbf,stroke:#333,stroke-width:2px + class B,C process + class D,E,F,G data +``` + +This diagram shows the key steps in the zkDB data flow: + +1. The client interacts with zkDB. +2. zkDB processes the request using the SP1 zkVM. +3. The zkVM executes operations on the Merkle Tree and generates proofs. +4. The Merkle Tree is updated and generates its own proof. +5. Proofs are verified by respective verifiers. +6. zkDB returns the response and proofs to the client. + +This process ensures verifiability at two levels: +- Data integrity and inclusion within the Merkle tree. +- Correctness of computation performed by the zkVM. + +The dual-proof system provides a robust foundation for secure and verifiable data management, combining the strengths of Merkle trees and zero-knowledge proofs. + +## zkDB as an Implementation of the VDB Framework + +The following diagram illustrates how zkDB implements the VDB Framework: + +```mermaid +graph TD + A[VDB Framework] -->|Implements| B[zkDB] + B -->|Uses| C[Merkle Trees] + B -->|Integrates| D[SP1 zkVM] + B -->|Enables| E[Data Privacy] + B -->|Supports| F[Complex Queries] + B -->|Provides| G[Full Verifiability] + B -->|Allows| H[Stateless Operations] + B -->|Implements| I[Advanced Computations] +``` + +This diagram shows how zkDB implements the VDB Framework by: +1. Using Merkle Trees for efficient data storage and proofs +2. Integrating SP1 zkVM for secure computations +3. Enabling Data Privacy through zero-knowledge proofs +4. Supporting Complex Queries beyond simple lookups +5. Providing Full Verifiability for all operations +6. Allowing Stateless Operations for flexibility +7. Implementing Advanced Computations securely + +These features make zkDB a powerful implementation of the VDB Framework, suitable for a wide range of applications requiring secure and verifiable data management.