From 25c3a443ced9b2ecf2755678e03bad43b10bf22f Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 29 Dec 2024 11:05:54 -0500 Subject: [PATCH] Consolidate example to_date.rs into dateframe.rs --- datafusion-examples/examples/dataframe.rs | 52 ++++++++++++++++++-- datafusion-examples/examples/to_date.rs | 60 ----------------------- 2 files changed, 48 insertions(+), 64 deletions(-) delete mode 100644 datafusion-examples/examples/to_date.rs diff --git a/datafusion-examples/examples/dataframe.rs b/datafusion-examples/examples/dataframe.rs index 5d5414e3d8b4..90d7d778ea5c 100644 --- a/datafusion-examples/examples/dataframe.rs +++ b/datafusion-examples/examples/dataframe.rs @@ -28,16 +28,24 @@ use std::io::Write; use std::sync::Arc; use tempfile::tempdir; -/// This example demonstrates using DataFusion's DataFrame API to +/// This example demonstrates using DataFusion's DataFrame API +/// +/// # Reading from different formats /// /// * [read_parquet]: execute queries against parquet files /// * [read_csv]: execute queries against csv files /// * [read_memory]: execute queries against in-memory arrow data /// -/// This example demonstrates the various methods to write out a DataFrame to local storage. -/// See datafusion-examples/examples/external_dependency/dataframe-to-s3.rs for an example -/// using a remote object store. +/// # Writing out to local storage +/// +/// The following examples demonstrate how to write a DataFrame to local +/// storage. See `external_dependency/dataframe-to-s3.rs` for an example writing +/// to a remote object store. +/// /// * [write_out]: write out a DataFrame to a table, parquet file, csv file, or json file +/// +/// # Querying data +/// * [query_to_date]: execute queries against parquet files #[tokio::main] async fn main() -> Result<()> { // The SessionContext is the main high level API for interacting with DataFusion @@ -46,6 +54,7 @@ async fn main() -> Result<()> { read_csv(&ctx).await?; read_memory(&ctx).await?; write_out(&ctx).await?; + query_to_date().await?; Ok(()) } @@ -206,3 +215,38 @@ async fn write_out(ctx: &SessionContext) -> std::result::Result<(), DataFusionEr Ok(()) } + +/// This example demonstrates how to use the to_date series +/// of functions in the DataFrame API as well as via sql. +async fn query_to_date() -> Result<()> { + // define a schema. + let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Utf8, false)])); + + // define data. + let batch = RecordBatch::try_new( + schema, + vec![Arc::new(StringArray::from(vec![ + "2020-09-08T13:42:29Z", + "2020-09-08T13:42:29.190855-05:00", + "2020-08-09 12:13:29", + "2020-01-02", + ]))], + )?; + + // declare a new context. In spark API, this corresponds to a new spark SQLsession + let ctx = SessionContext::new(); + + // declare a table in memory. In spark API, this corresponds to createDataFrame(...). + ctx.register_batch("t", batch)?; + let df = ctx.table("t").await?; + + // use to_date function to convert col 'a' to timestamp type using the default parsing + let df = df.with_column("a", to_date(vec![col("a")]))?; + + let df = df.select_columns(&["a"])?; + + // print the results + df.show().await?; + + Ok(()) +} diff --git a/datafusion-examples/examples/to_date.rs b/datafusion-examples/examples/to_date.rs deleted file mode 100644 index 99ee555ffc17..000000000000 --- a/datafusion-examples/examples/to_date.rs +++ /dev/null @@ -1,60 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::sync::Arc; - -use datafusion::arrow::array::StringArray; -use datafusion::arrow::datatypes::{DataType, Field, Schema}; -use datafusion::arrow::record_batch::RecordBatch; -use datafusion::error::Result; -use datafusion::prelude::*; - -/// This example demonstrates how to use the to_date series -/// of functions in the DataFrame API as well as via sql. -#[tokio::main] -async fn main() -> Result<()> { - // define a schema. - let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Utf8, false)])); - - // define data. - let batch = RecordBatch::try_new( - schema, - vec![Arc::new(StringArray::from(vec![ - "2020-09-08T13:42:29Z", - "2020-09-08T13:42:29.190855-05:00", - "2020-08-09 12:13:29", - "2020-01-02", - ]))], - )?; - - // declare a new context. In spark API, this corresponds to a new spark SQLsession - let ctx = SessionContext::new(); - - // declare a table in memory. In spark API, this corresponds to createDataFrame(...). - ctx.register_batch("t", batch)?; - let df = ctx.table("t").await?; - - // use to_date function to convert col 'a' to timestamp type using the default parsing - let df = df.with_column("a", to_date(vec![col("a")]))?; - - let df = df.select_columns(&["a"])?; - - // print the results - df.show().await?; - - Ok(()) -}