From bc2092f0080c4eec5e0f2cc7d88e01d097429eea Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Wed, 3 Jan 2024 12:07:30 +0100 Subject: [PATCH] chore: prune dependencies --- .github/workflows/build.yml | 2 +- Cargo.toml | 3 +- crates/deltalake-aws/Cargo.toml | 6 ++- crates/deltalake-aws/src/lib.rs | 2 +- crates/deltalake-aws/src/logstore.rs | 2 +- .../deltalake-aws/tests/integration_read.rs | 6 +-- .../tests/repair_s3_rename_test.rs | 2 +- crates/deltalake-catalog-glue/Cargo.toml | 9 +--- crates/deltalake-catalog-glue/src/lib.rs | 5 +- crates/deltalake-core/Cargo.toml | 19 ++----- .../src/delta_datafusion/mod.rs | 2 +- .../src/kernel/actions/types.rs | 2 +- .../src/kernel/expressions/mod.rs | 6 +-- .../src/kernel/expressions/scalars.rs | 6 +-- crates/deltalake-core/src/kernel/schema.rs | 2 +- crates/deltalake-core/src/lib.rs | 5 -- crates/deltalake-core/src/logstore/mod.rs | 2 +- .../src/operations/convert_to_delta.rs | 2 +- .../deltalake-core/src/operations/create.rs | 6 +-- .../deltalake-core/src/operations/optimize.rs | 2 +- crates/deltalake-core/src/operations/write.rs | 2 +- .../deltalake-core/src/operations/writer.rs | 3 +- .../src/protocol/checkpoints.rs | 2 +- crates/deltalake-core/src/protocol/mod.rs | 4 +- .../src/protocol/parquet_read/mod.rs | 51 ++++++++----------- crates/deltalake-core/src/storage/file.rs | 4 +- crates/deltalake-core/src/storage/utils.rs | 41 +-------------- crates/deltalake-core/src/table/builder.rs | 6 +-- crates/deltalake-core/src/table/mod.rs | 24 ++------- crates/deltalake-core/src/writer/json.rs | 2 +- .../deltalake-core/tests/command_optimize.rs | 4 +- .../deltalake-core/tests/command_restore.rs | 4 +- .../tests/commit_info_format.rs | 3 +- .../tests/integration_concurrent_writes.rs | 2 - .../tests/integration_datafusion.rs | 2 +- crates/deltalake-test/Cargo.toml | 5 +- crates/deltalake-test/src/lib.rs | 4 +- crates/deltalake-test/src/utils.rs | 6 +-- crates/deltalake/Cargo.toml | 8 +-- .../deltalake/examples/recordbatch-writer.rs | 2 +- delta-inspect/Cargo.toml | 2 +- python/Cargo.toml | 2 +- 42 files changed, 92 insertions(+), 182 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8ea03b8661..220c5b21d9 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -48,7 +48,7 @@ jobs: run: cargo clippy --features azure,datafusion,s3,gcs,glue --tests - name: Spot-check build for native-tls features - run: cargo clippy --no-default-features --features azure,datafusion,s3-native-tls,gcs,glue-native-tls --tests + run: cargo clippy --no-default-features --features azure,datafusion,s3-native-tls,gcs,glue --tests - name: Check docs run: cargo doc --features azure,datafusion,s3,gcs,glue diff --git a/Cargo.toml b/Cargo.toml index e8f33a7443..32f5e6fe2c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,6 +28,7 @@ arrow-ord = { version = "48.0.1" } arrow-row = { version = "48.0.1" } arrow-schema = { version = "48.0.1" } arrow-select = { version = "48.0.1" } +object_store = { version = "0.7.1" } parquet = { version = "48.0.1" } # datafusion @@ -46,7 +47,7 @@ serde_json = "1" # "stdlib" bytes = { version = "1" } chrono = { version = "0.4.31", default-features = false, features = ["clock"] } -log = { version = "0.4" } +tracing = { version = "0.1", features = ["log"] } regex = { version = "1" } thiserror = { version = "1" } url = { version = "2" } diff --git a/crates/deltalake-aws/Cargo.toml b/crates/deltalake-aws/Cargo.toml index b0f102ce7c..97cd60f1e2 100644 --- a/crates/deltalake-aws/Cargo.toml +++ b/crates/deltalake-aws/Cargo.toml @@ -9,13 +9,15 @@ rusoto_core = { version = "0.47", default-features = false, optional = true } rusoto_credential = { version = "0.47" } rusoto_sts = { version = "0.47", default-features = false, optional = true } rusoto_dynamodb = { version = "0.47", default-features = false, optional = true } -object_store = { version = "0.7.1", features = ["aws"]} lazy_static = "1" maplit = "1" + +# workspace dependencies async-trait = { workspace = true } bytes = { workspace = true } futures = { workspace = true } -log = { workspace = true } +tracing = { workspace = true } +object_store = { workspace = true, features = ["aws"]} thiserror = { workspace = true } tokio = { workspace = true } regex = { workspace = true } diff --git a/crates/deltalake-aws/src/lib.rs b/crates/deltalake-aws/src/lib.rs index f6a2b2da31..507b747427 100644 --- a/crates/deltalake-aws/src/lib.rs +++ b/crates/deltalake-aws/src/lib.rs @@ -5,7 +5,6 @@ pub mod logstore; pub mod storage; use lazy_static::lazy_static; -use log::*; use regex::Regex; use std::{ collections::HashMap, @@ -13,6 +12,7 @@ use std::{ sync::Arc, time::{Duration, SystemTime}, }; +use tracing::debug; use deltalake_core::logstore::{logstores, LogStore, LogStoreFactory}; use deltalake_core::storage::{factories, url_prefix_handler, ObjectStoreRef, StorageOptions}; diff --git a/crates/deltalake-aws/src/logstore.rs b/crates/deltalake-aws/src/logstore.rs index 295251c6ca..a9cb4154a9 100644 --- a/crates/deltalake-aws/src/logstore.rs +++ b/crates/deltalake-aws/src/logstore.rs @@ -9,7 +9,7 @@ use crate::{constants, CommitEntry, DynamoDbLockClient, UpdateLogEntryResult}; use bytes::Bytes; use deltalake_core::{ObjectStoreError, Path}; -use log::*; +use tracing::{debug, error, warn}; use url::Url; use deltalake_core::logstore::*; diff --git a/crates/deltalake-aws/tests/integration_read.rs b/crates/deltalake-aws/tests/integration_read.rs index 5e9c6f1040..a3aafc1970 100644 --- a/crates/deltalake-aws/tests/integration_read.rs +++ b/crates/deltalake-aws/tests/integration_read.rs @@ -88,7 +88,7 @@ async fn read_encoded_table(integration: &IntegrationContext, root_path: &str) - .await?; assert_eq!(table.version(), 0); - assert_eq!(table.get_files().len(), 2); + assert_eq!(table.get_files_iter().collect::>().len(), 2); Ok(()) } @@ -104,7 +104,7 @@ async fn read_simple_table(integration: &IntegrationContext) -> TestResult { assert_eq!(table.protocol().min_writer_version, 2); assert_eq!(table.protocol().min_reader_version, 1); assert_eq!( - table.get_files(), + table.get_files_iter().collect::>(), vec![ Path::from("part-00000-c1777d7d-89d9-4790-b38a-6ee7e24456b1-c000.snappy.parquet"), Path::from("part-00001-7891c33d-cedc-47c3-88a6-abcfb049d3b4-c000.snappy.parquet"), @@ -144,7 +144,7 @@ async fn read_simple_table_with_version(integration: &IntegrationContext) -> Tes assert_eq!(table.protocol().min_writer_version, 2); assert_eq!(table.protocol().min_reader_version, 1); assert_eq!( - table.get_files(), + table.get_files_iter().collect::>(), vec![ Path::from("part-00000-c1777d7d-89d9-4790-b38a-6ee7e24456b1-c000.snappy.parquet"), Path::from("part-00001-7891c33d-cedc-47c3-88a6-abcfb049d3b4-c000.snappy.parquet"), diff --git a/crates/deltalake-aws/tests/repair_s3_rename_test.rs b/crates/deltalake-aws/tests/repair_s3_rename_test.rs index a48af20d0f..dac9206359 100644 --- a/crates/deltalake-aws/tests/repair_s3_rename_test.rs +++ b/crates/deltalake-aws/tests/repair_s3_rename_test.rs @@ -7,7 +7,7 @@ use deltalake_core::storage::object_store::{ ObjectMeta, Result as ObjectStoreResult, }; use deltalake_core::{DeltaTableBuilder, ObjectStore, Path}; -use deltalake_test::utils::{IntegrationContext, StorageIntegration}; +use deltalake_test::utils::IntegrationContext; use futures::stream::BoxStream; use serial_test::serial; use std::ops::Range; diff --git a/crates/deltalake-catalog-glue/Cargo.toml b/crates/deltalake-catalog-glue/Cargo.toml index aee12726e6..1c9888981a 100644 --- a/crates/deltalake-catalog-glue/Cargo.toml +++ b/crates/deltalake-catalog-glue/Cargo.toml @@ -5,17 +5,12 @@ edition = "2021" [dependencies] async-trait = { workspace = true } -aws-config = "0.57.1" -aws-sdk-glue = "0.35.0" +aws-config = "1" +aws-sdk-glue = "1" deltalake-core = { path = "../deltalake-core" } # This can depend on a lowest common denominator of core once that's released # deltalake_core = { version = "0.17.0" } -log = "0.4" thiserror = { workspace = true } [dev-dependencies] tokio = { version = "1", features = ["macros", "rt-multi-thread"] } - -[features] -default = [] -native-tls = [] diff --git a/crates/deltalake-catalog-glue/src/lib.rs b/crates/deltalake-catalog-glue/src/lib.rs index 32894cc36e..e9ef449be2 100644 --- a/crates/deltalake-catalog-glue/src/lib.rs +++ b/crates/deltalake-catalog-glue/src/lib.rs @@ -1,8 +1,7 @@ //! Glue Data Catalog. //! -use aws_config::SdkConfig; +use aws_config::{BehaviorVersion, SdkConfig}; use deltalake_core::data_catalog::{DataCatalog, DataCatalogError}; -use log::*; #[derive(thiserror::Error, Debug)] pub enum GlueError { @@ -38,7 +37,7 @@ pub struct GlueDataCatalog { impl GlueDataCatalog { /// Creates a new GlueDataCatalog with environmental configuration pub async fn from_env() -> Result { - let config = aws_config::load_from_env().await; + let config = aws_config::load_defaults(BehaviorVersion::latest()).await; let client = aws_sdk_glue::Client::new(&config); Ok(Self { client }) } diff --git a/crates/deltalake-core/Cargo.toml b/crates/deltalake-core/Cargo.toml index 8ace57f36c..cb9e250e8f 100644 --- a/crates/deltalake-core/Cargo.toml +++ b/crates/deltalake-core/Cargo.toml @@ -68,13 +68,13 @@ tokio = { workspace = true, features = [ # other deps (these should be organized and pulled into workspace.dependencies as necessary) cfg-if = "1" +dashmap = "5" errno = "0.3" either = "1.8" fix-hidden-lifetime-bug = "0.2" hyper = { version = "0.14", optional = true } -itertools = "0.11" +itertools = "0.12" lazy_static = "1" -log = "0" libc = ">=0.2.90, <1" num-bigint = "0.4" num-traits = "0.2.15" @@ -83,7 +83,7 @@ once_cell = "1.16.0" parking_lot = "0.12" percent-encoding = "2" roaring = "0.10.1" -tracing = { version = "0.1", optional = true } +tracing = { workspace = true } rand = "0.8" z85 = "3.0.5" maplit = "1" @@ -93,16 +93,8 @@ reqwest = { version = "0.11.18", default-features = false, features = [ "rustls-tls", "json", ], optional = true } - -# Datafusion -dashmap = "5" - sqlparser = { version = "0.39", optional = true } -# NOTE dependencies only for integration tests -fs_extra = { version = "1.3.0", optional = true } -tempdir = { version = "0", optional = true } - [dev-dependencies] criterion = "0.5" ctor = "0" @@ -114,7 +106,6 @@ pretty_assertions = "1.2.1" pretty_env_logger = "*" rand = "0.8" serial_test = "2" -tempdir = "0" tempfile = "3" tokio = { version = "1", features = ["macros", "rt-multi-thread"] } utime = "0.3" @@ -147,10 +138,10 @@ datafusion = [ datafusion-ext = ["datafusion"] gcs = ["object_store/gcp"] # used only for integration testing -integration_test = ["fs_extra", "tempdir"] +integration_test = [] json = ["parquet/json"] python = ["arrow/pyarrow"] -unity-experimental = ["reqwest", "tracing", "hyper"] +unity-experimental = ["reqwest", "hyper"] [[bench]] name = "read_checkpoint" diff --git a/crates/deltalake-core/src/delta_datafusion/mod.rs b/crates/deltalake-core/src/delta_datafusion/mod.rs index bfd268fc77..66a65811f4 100644 --- a/crates/deltalake-core/src/delta_datafusion/mod.rs +++ b/crates/deltalake-core/src/delta_datafusion/mod.rs @@ -71,9 +71,9 @@ use datafusion_sql::planner::ParserOptions; use futures::TryStreamExt; use itertools::Itertools; -use log::error; use object_store::ObjectMeta; use serde::{Deserialize, Serialize}; +use tracing::error; use url::Url; use crate::errors::{DeltaResult, DeltaTableError}; diff --git a/crates/deltalake-core/src/kernel/actions/types.rs b/crates/deltalake-core/src/kernel/actions/types.rs index 67a94ec1c4..3dc177fb5b 100644 --- a/crates/deltalake-core/src/kernel/actions/types.rs +++ b/crates/deltalake-core/src/kernel/actions/types.rs @@ -5,8 +5,8 @@ use std::str::FromStr; // use std::sync::Arc; // use roaring::RoaringTreemap; -use log::warn; use serde::{Deserialize, Serialize}; +use tracing::warn; use url::Url; use super::super::schema::StructType; diff --git a/crates/deltalake-core/src/kernel/expressions/mod.rs b/crates/deltalake-core/src/kernel/expressions/mod.rs index 80c0a72cf3..ea02d08339 100644 --- a/crates/deltalake-core/src/kernel/expressions/mod.rs +++ b/crates/deltalake-core/src/kernel/expressions/mod.rs @@ -1,9 +1,7 @@ //! expressions. -use std::{ - collections::HashSet, - fmt::{Display, Formatter}, -}; +use std::collections::HashSet; +use std::fmt::{Display, Formatter}; use self::scalars::Scalar; diff --git a/crates/deltalake-core/src/kernel/expressions/scalars.rs b/crates/deltalake-core/src/kernel/expressions/scalars.rs index 175470e19e..90c71c0664 100644 --- a/crates/deltalake-core/src/kernel/expressions/scalars.rs +++ b/crates/deltalake-core/src/kernel/expressions/scalars.rs @@ -1,9 +1,7 @@ //! Scalar values for use in expressions. -use std::{ - cmp::Ordering, - fmt::{Display, Formatter}, -}; +use std::cmp::Ordering; +use std::fmt::{Display, Formatter}; use crate::kernel::schema::{DataType, PrimitiveType}; diff --git a/crates/deltalake-core/src/kernel/schema.rs b/crates/deltalake-core/src/kernel/schema.rs index 08cf991dd5..e8713b474d 100644 --- a/crates/deltalake-core/src/kernel/schema.rs +++ b/crates/deltalake-core/src/kernel/schema.rs @@ -6,11 +6,11 @@ use std::hash::{Hash, Hasher}; use std::sync::Arc; use std::{collections::HashMap, fmt::Display}; -use crate::kernel::DataCheck; use serde::{Deserialize, Serialize}; use serde_json::Value; use super::error::Error; +use crate::kernel::DataCheck; /// Type alias for a top level schema pub type Schema = StructType; diff --git a/crates/deltalake-core/src/lib.rs b/crates/deltalake-core/src/lib.rs index 2bd96a5d21..76928bdb5f 100644 --- a/crates/deltalake-core/src/lib.rs +++ b/crates/deltalake-core/src/lib.rs @@ -69,11 +69,6 @@ #![allow(rustdoc::invalid_html_tags)] #![allow(clippy::nonminimal_bool)] -#[cfg(all(feature = "glue", feature = "glue-native-tls"))] -compile_error!( - "Features glue and glue-native-tls are mutually exclusive and cannot be enabled together" -); - pub mod data_catalog; pub mod errors; pub mod kernel; diff --git a/crates/deltalake-core/src/logstore/mod.rs b/crates/deltalake-core/src/logstore/mod.rs index c67ef3ac0e..1571c5b654 100644 --- a/crates/deltalake-core/src/logstore/mod.rs +++ b/crates/deltalake-core/src/logstore/mod.rs @@ -22,8 +22,8 @@ use crate::{ DeltaTableError, }; use bytes::Bytes; -use log::*; use object_store::{path::Path, Error as ObjectStoreError, ObjectStore}; +use tracing::{debug, warn}; #[cfg(feature = "datafusion")] use datafusion::datasource::object_store::ObjectStoreUrl; diff --git a/crates/deltalake-core/src/operations/convert_to_delta.rs b/crates/deltalake-core/src/operations/convert_to_delta.rs index 48dc90b2dc..4920b049dc 100644 --- a/crates/deltalake-core/src/operations/convert_to_delta.rs +++ b/crates/deltalake-core/src/operations/convert_to_delta.rs @@ -16,7 +16,6 @@ use futures::{ future::{self, BoxFuture}, TryStreamExt, }; -use log::{debug, info}; use parquet::{ arrow::async_reader::{ParquetObjectReader, ParquetRecordBatchStreamBuilder}, errors::ParquetError, @@ -29,6 +28,7 @@ use std::{ str::{FromStr, Utf8Error}, sync::Arc, }; +use tracing::{debug, info}; /// Error converting a Parquet table to a Delta table #[derive(Debug, thiserror::Error)] diff --git a/crates/deltalake-core/src/operations/create.rs b/crates/deltalake-core/src/operations/create.rs index 0e44fe215f..57df1fe90a 100644 --- a/crates/deltalake-core/src/operations/create.rs +++ b/crates/deltalake-core/src/operations/create.rs @@ -330,7 +330,7 @@ mod tests { use crate::operations::DeltaOps; use crate::table::config::DeltaConfigKey; use crate::writer::test_utils::get_delta_schema; - use tempdir::TempDir; + use tempfile::TempDir; #[tokio::test] async fn test_create() { @@ -349,7 +349,7 @@ mod tests { #[tokio::test] async fn test_create_local_relative_path() { let table_schema = get_delta_schema(); - let tmp_dir = TempDir::new_in(".", "tmp_").unwrap(); + let tmp_dir = TempDir::new_in(".").unwrap(); let relative_path = format!( "./{}", tmp_dir.path().file_name().unwrap().to_str().unwrap() @@ -369,7 +369,7 @@ mod tests { #[tokio::test] async fn test_create_table_local_path() { let schema = get_delta_schema(); - let tmp_dir = TempDir::new_in(".", "tmp_").unwrap(); + let tmp_dir = TempDir::new_in(".").unwrap(); let relative_path = format!( "./{}", tmp_dir.path().file_name().unwrap().to_str().unwrap() diff --git a/crates/deltalake-core/src/operations/optimize.rs b/crates/deltalake-core/src/operations/optimize.rs index 24ecb8e853..7cf523e09e 100644 --- a/crates/deltalake-core/src/operations/optimize.rs +++ b/crates/deltalake-core/src/operations/optimize.rs @@ -30,13 +30,13 @@ use futures::future::BoxFuture; use futures::stream::BoxStream; use futures::{Future, StreamExt, TryStreamExt}; use itertools::Itertools; -use log::debug; use num_cpus; use parquet::arrow::async_reader::{ParquetObjectReader, ParquetRecordBatchStreamBuilder}; use parquet::basic::{Compression, ZstdLevel}; use parquet::errors::ParquetError; use parquet::file::properties::WriterProperties; use serde::{Deserialize, Serialize}; +use tracing::debug; use super::transaction::{commit, PROTOCOL}; use super::writer::{PartitionWriter, PartitionWriterConfig}; diff --git a/crates/deltalake-core/src/operations/write.rs b/crates/deltalake-core/src/operations/write.rs index 6da3b18ecb..b9ec348420 100644 --- a/crates/deltalake-core/src/operations/write.rs +++ b/crates/deltalake-core/src/operations/write.rs @@ -920,7 +920,7 @@ mod tests { #[tokio::test] async fn test_special_characters_write_read() { - let tmp_dir = tempdir::TempDir::new("test").unwrap(); + let tmp_dir = tempfile::tempdir().unwrap(); let tmp_path = std::fs::canonicalize(tmp_dir.path()).unwrap(); let schema = Arc::new(ArrowSchema::new(vec![ diff --git a/crates/deltalake-core/src/operations/writer.rs b/crates/deltalake-core/src/operations/writer.rs index 6d551ecb96..8b31f9c252 100644 --- a/crates/deltalake-core/src/operations/writer.rs +++ b/crates/deltalake-core/src/operations/writer.rs @@ -10,6 +10,7 @@ use object_store::{path::Path, ObjectStore}; use parquet::arrow::ArrowWriter; use parquet::basic::Compression; use parquet::file::properties::WriterProperties; +use tracing::debug; use crate::crate_version; use crate::errors::{DeltaResult, DeltaTableError}; @@ -380,7 +381,7 @@ impl PartitionWriter { self.write_batch(&batch.slice(offset, length))?; // flush currently buffered data to disk once we meet or exceed the target file size. if self.buffer.len() >= self.config.target_file_size { - log::debug!("Writing file with size {:?} to disk.", self.buffer.len()); + debug!("Writing file with size {:?} to disk.", self.buffer.len()); self.flush_arrow_writer().await?; } } diff --git a/crates/deltalake-core/src/protocol/checkpoints.rs b/crates/deltalake-core/src/protocol/checkpoints.rs index af695b662e..b7e38582fe 100644 --- a/crates/deltalake-core/src/protocol/checkpoints.rs +++ b/crates/deltalake-core/src/protocol/checkpoints.rs @@ -10,12 +10,12 @@ use arrow_schema::{ArrowError, Schema as ArrowSchema}; use chrono::{Datelike, Utc}; use futures::{StreamExt, TryStreamExt}; use lazy_static::lazy_static; -use log::*; use object_store::ObjectStore; use parquet::arrow::ArrowWriter; use parquet::errors::ParquetError; use regex::Regex; use serde_json::Value; +use tracing::{debug, error}; use super::{time_utils, ProtocolError}; use crate::kernel::arrow::delta_log_schema_for_table; diff --git a/crates/deltalake-core/src/protocol/mod.rs b/crates/deltalake-core/src/protocol/mod.rs index 53b2f471d1..c16721ad2c 100644 --- a/crates/deltalake-core/src/protocol/mod.rs +++ b/crates/deltalake-core/src/protocol/mod.rs @@ -12,7 +12,6 @@ mod time_utils; use arrow_schema::ArrowError; use futures::StreamExt; use lazy_static::lazy_static; -use log::debug; use object_store::{path::Path, Error as ObjectStoreError, ObjectStore}; use regex::Regex; use serde::{Deserialize, Serialize}; @@ -22,6 +21,7 @@ use std::collections::HashMap; use std::hash::{Hash, Hasher}; use std::mem::take; use std::str::FromStr; +use tracing::{debug, error}; use crate::errors::{DeltaResult, DeltaTableError}; use crate::kernel::{Add, CommitInfo, Metadata, Protocol, Remove}; @@ -266,7 +266,7 @@ impl Add { Ok(Some(stats)) => Ok(Some(stats)), Ok(None) => self.get_json_stats(), Err(e) => { - log::error!( + error!( "Error when reading parquet stats {:?} {e}. Attempting to read json stats", self.stats_parsed ); diff --git a/crates/deltalake-core/src/protocol/parquet_read/mod.rs b/crates/deltalake-core/src/protocol/parquet_read/mod.rs index ecad3b7865..d6f0ac7979 100644 --- a/crates/deltalake-core/src/protocol/parquet_read/mod.rs +++ b/crates/deltalake-core/src/protocol/parquet_read/mod.rs @@ -5,6 +5,7 @@ use num_bigint::BigInt; use num_traits::cast::ToPrimitive; use parquet::record::{Field, ListAccessor, MapAccessor, RowAccessor}; use serde_json::json; +use tracing::{debug, error, warn}; use crate::kernel::serde_path::decode_path; use crate::kernel::{ @@ -87,10 +88,9 @@ impl DeletionVectorDescriptor { })?; } _ => { - log::debug!( + debug!( "Unexpected field name `{}` for deletion vector: {:?}", - name, - record + name, record ); } } @@ -208,10 +208,9 @@ impl Add { } }, _ => { - log::debug!( + debug!( "Unexpected field name `{}` for add action: {:?}", - name, - record + name, record ); } } @@ -231,7 +230,7 @@ impl Add { "numRecords" => if let Ok(v) = record.get_long(i) { stats.num_records = v; } else { - log::error!("Expect type of stats_parsed field numRecords to be long, got: {}", record); + error!("Expect type of stats_parsed field numRecords to be long, got: {}", record); } "minValues" => if let Ok(row) = record.get_group(i) { for (name, field) in row.get_column_iter() { @@ -242,7 +241,7 @@ impl Add { } } } else { - log::error!("Expect type of stats_parsed field minRecords to be struct, got: {}", record); + error!("Expect type of stats_parsed field minRecords to be struct, got: {}", record); } "maxValues" => if let Ok(row) = record.get_group(i) { for (name, field) in row.get_column_iter() { @@ -253,7 +252,7 @@ impl Add { } } } else { - log::error!("Expect type of stats_parsed field maxRecords to be struct, got: {}", record); + error!("Expect type of stats_parsed field maxRecords to be struct, got: {}", record); } "nullCount" => if let Ok(row) = record.get_group(i) { for (name, field) in row.get_column_iter() { @@ -264,10 +263,10 @@ impl Add { } } } else { - log::error!("Expect type of stats_parsed field nullCount to be struct, got: {}", record); + error!("Expect type of stats_parsed field nullCount to be struct, got: {}", record); } _ => { - log::debug!( + debug!( "Unexpected field name `{}` for stats_parsed: {:?}", name, record, @@ -293,10 +292,9 @@ fn field_to_value_stat(field: &Field, field_name: &str) -> Option Option Some(ColumnCountStat::Value(*value)), _ => { - log::warn!( + warn!( "Unexpected type when parsing nullCounts for {}. Found {}", - field_name, - field + field_name, field ); None } @@ -488,10 +485,9 @@ impl Metadata { } } _ => { - log::debug!( + debug!( "Unexpected field name `{}` for metaData action: {:?}", - name, - record + name, record ); } } @@ -579,10 +575,9 @@ impl Remove { } "numRecords" => {} _ => { - log::debug!( + debug!( "Unexpected field name `{}` for remove action: {:?}", - name, - record + name, record ); } } @@ -615,10 +610,9 @@ impl Txn { re.last_updated = record.get_long(i).map(Some).unwrap_or(None); } _ => { - log::debug!( + debug!( "Unexpected field name `{}` for txn action: {:?}", - name, - record + name, record ); } } @@ -662,10 +656,9 @@ impl Protocol { .ok() } _ => { - log::debug!( + debug!( "Unexpected field name `{}` for protocol action: {:?}", - name, - record + name, record ); } } diff --git a/crates/deltalake-core/src/storage/file.rs b/crates/deltalake-core/src/storage/file.rs index 6e64e52be9..3af69e164d 100644 --- a/crates/deltalake-core/src/storage/file.rs +++ b/crates/deltalake-core/src/storage/file.rs @@ -379,9 +379,9 @@ mod tests { use std::io::Write; use std::path::{Path, PathBuf}; - #[tokio::test()] + #[tokio::test] async fn test_rename_noreplace() { - let tmp_dir = tempdir::TempDir::new_in(".", "test_rename_noreplace").unwrap(); + let tmp_dir = tempfile::tempdir().unwrap(); let a = create_file(tmp_dir.path(), "a"); let b = create_file(tmp_dir.path(), "b"); let c = &tmp_dir.path().join("c"); diff --git a/crates/deltalake-core/src/storage/utils.rs b/crates/deltalake-core/src/storage/utils.rs index 39f052a89e..46cc230309 100644 --- a/crates/deltalake-core/src/storage/utils.rs +++ b/crates/deltalake-core/src/storage/utils.rs @@ -1,51 +1,12 @@ //! Utility functions for working across Delta tables -use std::collections::HashMap; -use std::sync::Arc; - use chrono::{NaiveDateTime, TimeZone, Utc}; -use futures::{StreamExt, TryStreamExt}; +use futures::TryStreamExt; use object_store::path::Path; use object_store::{DynObjectStore, ObjectMeta, Result as ObjectStoreResult}; use crate::errors::{DeltaResult, DeltaTableError}; use crate::kernel::Add; -use crate::table::builder::DeltaTableBuilder; - -/// Copies the contents from the `from` location into the `to` location -pub async fn copy_table( - from: impl AsRef, - from_options: Option>, - to: impl AsRef, - to_options: Option>, - allow_http: bool, -) -> Result<(), DeltaTableError> { - let from_store = DeltaTableBuilder::from_uri(from) - .with_storage_options(from_options.unwrap_or_default()) - .with_allow_http(allow_http) - .build_storage()?; - let to_store = DeltaTableBuilder::from_uri(to) - .with_storage_options(to_options.unwrap_or_default()) - .with_allow_http(allow_http) - .build_storage()?; - sync_stores(from_store.object_store(), to_store.object_store()).await -} - -/// Synchronize the contents of two object stores -pub async fn sync_stores( - from_store: Arc, - to_store: Arc, -) -> Result<(), DeltaTableError> { - // TODO if a table is copied within the same root store (i.e bucket), using copy would be MUCH more efficient - let mut meta_stream = from_store.list(None).await?; - while let Some(file) = meta_stream.next().await { - if let Ok(meta) = file { - let bytes = from_store.get(&meta.location).await?.bytes().await?; - to_store.put(&meta.location, bytes).await?; - } - } - Ok(()) -} /// Collect list stream pub async fn flatten_list_stream( diff --git a/crates/deltalake-core/src/table/builder.rs b/crates/deltalake-core/src/table/builder.rs index 87c321f9c5..5c8a2ec379 100644 --- a/crates/deltalake-core/src/table/builder.rs +++ b/crates/deltalake-core/src/table/builder.rs @@ -5,9 +5,9 @@ use std::path::PathBuf; use std::sync::Arc; use chrono::{DateTime, FixedOffset, Utc}; -use log::*; use object_store::DynObjectStore; use serde::{Deserialize, Serialize}; +use tracing::debug; use url::Url; use super::DeltaTable; @@ -509,7 +509,7 @@ mod tests { #[test] fn test_ensure_table_uri_path() { - let tmp_dir = tempdir::TempDir::new("test").unwrap(); + let tmp_dir = tempfile::tempdir().unwrap(); let tmp_path = std::fs::canonicalize(tmp_dir.path()).unwrap(); let paths = &[ tmp_path.join("data/delta-0.8.0"), @@ -539,7 +539,7 @@ mod tests { let url = ensure_table_uri(&expected).unwrap(); assert_eq!(expected, url); - let tmp_dir = tempdir::TempDir::new("test").unwrap(); + let tmp_dir = tempfile::tempdir().unwrap(); let tmp_path = std::fs::canonicalize(tmp_dir.path()).unwrap(); let path = tmp_path.join("data/delta-0.8.0"); let expected = Url::from_directory_path(path).unwrap(); diff --git a/crates/deltalake-core/src/table/mod.rs b/crates/deltalake-core/src/table/mod.rs index 018e79ebe9..9dc748ba04 100644 --- a/crates/deltalake-core/src/table/mod.rs +++ b/crates/deltalake-core/src/table/mod.rs @@ -9,12 +9,12 @@ use std::{cmp::max, cmp::Ordering, collections::HashSet}; use chrono::{DateTime, Utc}; use futures::StreamExt; use lazy_static::lazy_static; -use log::debug; use object_store::{path::Path, Error as ObjectStoreError, ObjectStore}; use regex::Regex; use serde::de::{Error, SeqAccess, Visitor}; use serde::ser::SerializeSeq; use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use tracing::debug; use uuid::Uuid; use self::builder::DeltaTableConfig; @@ -931,7 +931,7 @@ impl std::fmt::Debug for DeltaTable { #[cfg(test)] mod tests { use pretty_assertions::assert_eq; - use tempdir::TempDir; + use tempfile::TempDir; use super::*; use crate::kernel::{DataType, PrimitiveType, StructField}; @@ -975,24 +975,6 @@ mod tests { drop(tmp_dir); } - /* TODO move into deltalake-aws crate - #[cfg(any(feature = "s3", feature = "s3-native-tls"))] - #[test] - fn normalize_table_uri_s3() { - std::env::set_var("AWS_DEFAULT_REGION", "us-east-1"); - for table_uri in [ - "s3://tests/data/delta-0.8.0/", - "s3://tests/data/delta-0.8.0//", - "s3://tests/data/delta-0.8.0", - ] - .iter() - { - let table = crate::DeltaTableBuilder::from_uri(table_uri).build().unwrap(); - assert_eq!(table.table_uri(), "s3://tests/data/delta-0.8.0"); - } - } - */ - #[test] fn get_table_constraints() { let state = DeltaTableMetaData::new( @@ -1015,7 +997,7 @@ mod tests { } async fn create_test_table() -> (DeltaTable, TempDir) { - let tmp_dir = TempDir::new("create_table_test").unwrap(); + let tmp_dir = tempfile::tempdir().unwrap(); let table_dir = tmp_dir.path().join("test_create"); std::fs::create_dir(&table_dir).unwrap(); diff --git a/crates/deltalake-core/src/writer/json.rs b/crates/deltalake-core/src/writer/json.rs index 0b970ae6d7..71976afc38 100644 --- a/crates/deltalake-core/src/writer/json.rs +++ b/crates/deltalake-core/src/writer/json.rs @@ -6,7 +6,6 @@ use std::sync::Arc; use arrow::datatypes::{Schema as ArrowSchema, SchemaRef as ArrowSchemaRef}; use arrow::record_batch::*; use bytes::Bytes; -use log::{info, warn}; use object_store::path::Path; use object_store::ObjectStore; use parquet::{ @@ -14,6 +13,7 @@ use parquet::{ file::properties::WriterProperties, }; use serde_json::Value; +use tracing::{info, warn}; use uuid::Uuid; use super::stats::create_add; diff --git a/crates/deltalake-core/tests/command_optimize.rs b/crates/deltalake-core/tests/command_optimize.rs index 468f3f279f..75ac89b519 100644 --- a/crates/deltalake-core/tests/command_optimize.rs +++ b/crates/deltalake-core/tests/command_optimize.rs @@ -24,7 +24,7 @@ use parquet::arrow::ParquetRecordBatchStreamBuilder; use parquet::file::properties::WriterProperties; use rand::prelude::*; use serde_json::json; -use tempdir::TempDir; +use tempfile::TempDir; struct Context { pub tmp_dir: TempDir, @@ -56,7 +56,7 @@ async fn setup_test(partitioned: bool) -> Result> { vec![] }; - let tmp_dir = tempdir::TempDir::new("opt_table").unwrap(); + let tmp_dir = tempfile::tempdir().unwrap(); let table_uri = tmp_dir.path().to_str().to_owned().unwrap(); let dt = DeltaOps::try_from_uri(table_uri) .await? diff --git a/crates/deltalake-core/tests/command_restore.rs b/crates/deltalake-core/tests/command_restore.rs index fd8d77c8b9..9b77662ce4 100644 --- a/crates/deltalake-core/tests/command_restore.rs +++ b/crates/deltalake-core/tests/command_restore.rs @@ -14,7 +14,7 @@ use std::fs; use std::sync::Arc; use std::thread; use std::time::Duration; -use tempdir::TempDir; +use tempfile::TempDir; #[derive(Debug)] struct Context { @@ -36,7 +36,7 @@ async fn setup_test() -> Result> { ), ]; - let tmp_dir = tempdir::TempDir::new("restore_table").unwrap(); + let tmp_dir = tempfile::tempdir().unwrap(); let table_uri = tmp_dir.path().to_str().to_owned().unwrap(); let table = DeltaOps::try_from_uri(table_uri) .await? diff --git a/crates/deltalake-core/tests/commit_info_format.rs b/crates/deltalake-core/tests/commit_info_format.rs index a9d05e4c11..b0e9d2d324 100644 --- a/crates/deltalake-core/tests/commit_info_format.rs +++ b/crates/deltalake-core/tests/commit_info_format.rs @@ -6,11 +6,10 @@ use deltalake_core::operations::transaction::commit; use deltalake_core::protocol::{DeltaOperation, SaveMode}; use serde_json::json; use std::error::Error; -use tempdir::TempDir; #[tokio::test] async fn test_operational_parameters() -> Result<(), Box> { - let path = TempDir::new("operational_parameters").unwrap(); + let path = tempfile::tempdir().unwrap(); let mut table = fs_common::create_table(path.path().to_str().unwrap(), None).await; let add = fs_common::add(0); diff --git a/crates/deltalake-core/tests/integration_concurrent_writes.rs b/crates/deltalake-core/tests/integration_concurrent_writes.rs index 4e66a9f93f..ed941c2729 100644 --- a/crates/deltalake-core/tests/integration_concurrent_writes.rs +++ b/crates/deltalake-core/tests/integration_concurrent_writes.rs @@ -1,7 +1,5 @@ #![cfg(feature = "integration_test")] -use log::*; - use deltalake_core::kernel::{Action, Add, DataType, PrimitiveType, StructField, StructType}; use deltalake_core::operations::transaction::commit; use deltalake_core::operations::DeltaOps; diff --git a/crates/deltalake-core/tests/integration_datafusion.rs b/crates/deltalake-core/tests/integration_datafusion.rs index 25a3fddbce..45e2a41f6d 100644 --- a/crates/deltalake-core/tests/integration_datafusion.rs +++ b/crates/deltalake-core/tests/integration_datafusion.rs @@ -1130,7 +1130,7 @@ mod date_partitions { ), ]; - let tmp_dir = tempdir::TempDir::new("opt_table").unwrap(); + let tmp_dir = tempfile::tempdir().unwrap(); let table_uri = tmp_dir.path().to_str().to_owned().unwrap(); let dt = DeltaOps::try_from_uri(table_uri) .await? diff --git a/crates/deltalake-test/Cargo.toml b/crates/deltalake-test/Cargo.toml index ea00793a3a..e41ad6e3ce 100644 --- a/crates/deltalake-test/Cargo.toml +++ b/crates/deltalake-test/Cargo.toml @@ -13,11 +13,8 @@ fs_extra = "1.3.0" rand = "0.8" serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } -tempdir = "0" tempfile = "3" [features] default = [] -datafusion = [ - "deltalake-core/datafusion", -] +datafusion = ["deltalake-core/datafusion"] diff --git a/crates/deltalake-test/src/lib.rs b/crates/deltalake-test/src/lib.rs index 3ed261b672..5a698305d6 100644 --- a/crates/deltalake-test/src/lib.rs +++ b/crates/deltalake-test/src/lib.rs @@ -12,7 +12,7 @@ use deltalake_core::{ObjectStore, Path}; use std::any::Any; use std::collections::HashMap; use std::sync::Arc; -use tempdir::TempDir; +use tempfile::TempDir; pub mod clock; #[cfg(feature = "datafusion")] @@ -88,7 +88,7 @@ impl TestContext { } pub async fn setup_local_context() -> TestContext { - let tmp_dir = tempdir::TempDir::new("delta-rs_tests").unwrap(); + let tmp_dir = tempfile::tempdir().unwrap(); let mut config = HashMap::new(); config.insert( "URI".to_owned(), diff --git a/crates/deltalake-test/src/utils.rs b/crates/deltalake-test/src/utils.rs index 8468bc5728..0451d7c751 100644 --- a/crates/deltalake-test/src/utils.rs +++ b/crates/deltalake-test/src/utils.rs @@ -5,7 +5,7 @@ use fs_extra::dir::{copy, CopyOptions}; use std::collections::HashMap; use std::env; use std::process::ExitStatus; -use tempdir::TempDir; +use tempfile::{tempdir, TempDir}; pub type TestResult = Result<(), Box>; @@ -31,7 +31,7 @@ pub struct LocalStorageIntegration { impl Default for LocalStorageIntegration { fn default() -> Self { Self { - tmp_dir: TempDir::new("").expect("Failed to make temp dir"), + tmp_dir: tempdir().expect("Failed to make temp dir"), } } } @@ -81,7 +81,7 @@ impl IntegrationContext { integration.prepare_env(); - let tmp_dir = TempDir::new("")?; + let tmp_dir = tempdir()?; // create a fresh bucket in every context. THis is done via CLI... integration.create_bucket()?; let store = integration.object_store()?; diff --git a/crates/deltalake/Cargo.toml b/crates/deltalake/Cargo.toml index 41df0c4565..9cd0789567 100644 --- a/crates/deltalake/Cargo.toml +++ b/crates/deltalake/Cargo.toml @@ -27,20 +27,20 @@ datafusion = ["deltalake-core/datafusion"] datafusion-ext = ["datafusion"] gcs = ["deltalake-core/gcs"] glue = ["deltalake-catalog-glue"] -glue-native-tls = ["deltalake-catalog-glue/native-tls"] hdfs = [] -# used only for integration testing -integration_test = ["deltalake-core/integration_test"] json = ["deltalake-core/json"] python = ["deltalake-core/python"] s3-native-tls = ["deltalake-aws/native-tls"] s3 = ["deltalake-aws/rustls"] unity-experimental = ["deltalake-core/unity-experimental"] +# used only for integration testing +integration_test = ["deltalake-core/integration_test"] + [dev-dependencies] tokio = { version = "1", features = ["macros", "rt-multi-thread"] } chrono = { workspace = true, default-features = false, features = ["clock"] } -log = "*" +tracing = { workspace = true } [[example]] name = "basic_operations" diff --git a/crates/deltalake/examples/recordbatch-writer.rs b/crates/deltalake/examples/recordbatch-writer.rs index 67aac3b962..874b4a10f7 100644 --- a/crates/deltalake/examples/recordbatch-writer.rs +++ b/crates/deltalake/examples/recordbatch-writer.rs @@ -18,8 +18,8 @@ use deltalake::parquet::{ use deltalake::writer::{DeltaWriter, RecordBatchWriter}; use deltalake::Path; use deltalake::*; -use log::*; use std::sync::Arc; +use tracing::*; /* * The main function gets everything started, but does not contain any meaningful diff --git a/delta-inspect/Cargo.toml b/delta-inspect/Cargo.toml index cc6efaa381..78d2477486 100644 --- a/delta-inspect/Cargo.toml +++ b/delta-inspect/Cargo.toml @@ -22,5 +22,5 @@ features = ["azure", "gcs"] [features] default = ["rustls"] -native-tls = ["deltalake/s3-native-tls", "deltalake/glue-native-tls"] +native-tls = ["deltalake/s3-native-tls", "deltalake/glue"] rustls = ["deltalake/s3", "deltalake/glue"] diff --git a/python/Cargo.toml b/python/Cargo.toml index dd3bcca1e9..558af93b12 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -49,5 +49,5 @@ features = ["azure", "gcs", "python", "datafusion", "unity-experimental"] [features] default = ["rustls"] -native-tls = ["deltalake/s3-native-tls", "deltalake/glue-native-tls"] +native-tls = ["deltalake/s3-native-tls", "deltalake/glue"] rustls = ["deltalake/s3", "deltalake/glue"]