Note: This is not an official Apache Software Foundation release.
This crate provides functions to generate the TPCH benchmark dataset for Datafusion using the tpchgen crates.
The datafusion-tpch
crate offers two possible ways to register the TPCH table
functions.
You can register the individual udtfs separately.
use datafusion_tpch::register_tpch_udtfs;
#[tokio::main]
async fn main() -> Result<()> {
// create local execution context
let ctx = SessionContext::new();
// Register all the UDTFs.
register_tpch_udtfs(&ctx);
// Generate the nation table with a scale factor of 1.
let df = ctx
.sql(format!("SELECT * FROM tpch_nation(1.0);").as_str())
.await?;
df.show().await?;
Ok(())
}
Or you can register a single UDTF which generates all tables at once.
use datafusion_tpch::register_tpch_udtfs;
#[tokio::main]
async fn main() -> Result<()> {
// create local execution context
let ctx = SessionContext::new();
// Register all the UDTFs.
register_tpch_udtf(&ctx);
// Generate the nation table with a scale factor of 1.
let df = ctx
.sql(format!("SELECT * FROM tpch(1.0);").as_str())
.await?;
df.show().await?;
Ok(())
}
To keep things simple we don't bundle writing to parquet in the table provider
but instead defer that to the user who can use the COPY
command.
use datafusion::prelude::{SessionConfig, SessionContext};
use datafusion_tpch::{register_tpch_udtf, register_tpch_udtfs};
#[tokio::main]
async fn main() -> datafusion::error::Result<()> {
let ctx = SessionContext::new_with_config(SessionConfig::new().with_information_schema(true));
register_tpch_udtf(&ctx);
let sql_df = ctx.sql(&format!("SELECT * FROM tpch(1.0);")).await?;
sql_df.show().await?;
let sql_df = ctx.sql(&format!("SHOW TABLES;")).await?;
sql_df.show().await?;
let sql_df = ctx
.sql(&format!(
"COPY nation TO './tpch_nation.parquet' STORED AS PARQUET"
))
.await?;
sql_df.show().await?;
register_tpch_udtfs(&ctx)?;
let sql_df = ctx
.sql(&format!(
"COPY (SELECT * FROM tpch_lineitem(1.0)) TO './tpch_lineitem_sf_10.parquet' STORED AS PARQUET"
))
.await?;
sql_df.show().await?;
Ok(())
}
You can find other examples in the examples directory.
To quickly see the Parquet example in action, you can run the provided example directly from your terminal:
cargo run --example parquet
The project is licensed under the APACHE 2.0 license.