From 584d31e70923a84d3b1b5e3a2e71bce9f2ed539d Mon Sep 17 00:00:00 2001 From: Jordan Frazier Date: Thu, 28 Sep 2023 12:36:55 -0700 Subject: [PATCH] use temp dir instead of cache --- Cargo.lock | 28 ------------------- Cargo.toml | 1 - crates/sparrow-runtime/Cargo.toml | 1 - .../sparrow-runtime/src/prepare/preparer.rs | 28 +++++++++---------- python/Cargo.lock | 28 ------------------- 5 files changed, 14 insertions(+), 72 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2025bb210..98b1b48c2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1432,15 +1432,6 @@ dependencies = [ "crypto-common", ] -[[package]] -name = "dirs" -version = "5.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" -dependencies = [ - "dirs-sys", -] - [[package]] name = "dirs-next" version = "2.0.0" @@ -1451,18 +1442,6 @@ dependencies = [ "dirs-sys-next", ] -[[package]] -name = "dirs-sys" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" -dependencies = [ - "libc", - "option-ext", - "redox_users", - "windows-sys 0.48.0", -] - [[package]] name = "dirs-sys-next" version = "0.1.2" @@ -3020,12 +2999,6 @@ dependencies = [ "tokio-stream", ] -[[package]] -name = "option-ext" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" - [[package]] name = "ordered-float" version = "2.10.0" @@ -4766,7 +4739,6 @@ dependencies = [ "dashmap", "data-encoding", "derive_more", - "dirs", "enum-map", "erased-serde", "error-stack", diff --git a/Cargo.toml b/Cargo.toml index d88f0b7ef..baed629a3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,7 +48,6 @@ dashmap = "5.4.0" data-encoding = "2.3.3" decorum = "0.3.1" derive_more = "0.99.17" -dirs = "5.0.1" edit-distance = "2.1.0" egg = "0.9.3" enum-as-inner = "0.6.0" diff --git a/crates/sparrow-runtime/Cargo.toml b/crates/sparrow-runtime/Cargo.toml index ac76d1dbf..fee2c4a66 100644 --- a/crates/sparrow-runtime/Cargo.toml +++ b/crates/sparrow-runtime/Cargo.toml @@ -34,7 +34,6 @@ clap.workspace = true dashmap.workspace = true data-encoding.workspace = true derive_more.workspace = true -dirs.workspace = true enum-map.workspace = true erased-serde.workspace = true error-stack.workspace = true diff --git a/crates/sparrow-runtime/src/prepare/preparer.rs b/crates/sparrow-runtime/src/prepare/preparer.rs index da58c0d96..41408a7dc 100644 --- a/crates/sparrow-runtime/src/prepare/preparer.rs +++ b/crates/sparrow-runtime/src/prepare/preparer.rs @@ -18,7 +18,10 @@ use crate::PreparedMetadata; use super::{prepared_batches, write_parquet}; -const KASKADA_PATH: &str = ".cache/kaskada"; +/// For now, this is a temporary location for the prepared files. +/// In the future, we'll want to move this path to a managed cache +/// so we can reuse state. +const KASKADA_PATH: &str = "kaskada"; const PREPARED_FILE_PREFIX: &str = "part"; #[derive(derive_more::Display, Debug)] @@ -96,7 +99,7 @@ impl Preparer { ) -> error_stack::Result, Error> { // TODO: Support Slicing - let output_path_prefix = self.prepared_output_prefix()?; + let output_path_prefix = self.prepared_output_prefix(); let output_url = ObjectStoreUrl::from_str(&output_path_prefix) .change_context_lazy(|| Error::InvalidUrl(output_path_prefix))?; @@ -178,19 +181,16 @@ impl Preparer { ) } // Prepared files are stored in the following format: - // file://///tables//prepared//part-.parquet - pub fn prepared_output_prefix(&self) -> error_stack::Result { + // file://///tables//prepared//part-.parquet + pub fn prepared_output_prefix(&self) -> String { let uuid = Uuid::new_v4(); - let home_dir = dirs::home_dir(); - if let Some(home_dir) = home_dir.map(|p| p.display().to_string()) { - Ok(format!( - "file:///{}/{}/tables/{}/prepare/{uuid}/", - home_dir, KASKADA_PATH, self.table_config.uuid - )) - } else { - tracing::error!("Failed to get home directory"); - error_stack::bail!(Error::Internal) - } + let temp_dir = tempfile::tempdir().expect("failed to create temp dir"); + format!( + "file:///{}/{}/tables/{}/prepare/{uuid}/", + temp_dir.path().display(), + KASKADA_PATH, + self.table_config.uuid + ) } } diff --git a/python/Cargo.lock b/python/Cargo.lock index 1ae66e179..1a17ce41a 100644 --- a/python/Cargo.lock +++ b/python/Cargo.lock @@ -1224,15 +1224,6 @@ dependencies = [ "crypto-common", ] -[[package]] -name = "dirs" -version = "5.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" -dependencies = [ - "dirs-sys", -] - [[package]] name = "dirs-next" version = "2.0.0" @@ -1243,18 +1234,6 @@ dependencies = [ "dirs-sys-next", ] -[[package]] -name = "dirs-sys" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" -dependencies = [ - "libc", - "option-ext", - "redox_users", - "windows-sys", -] - [[package]] name = "dirs-sys-next" version = "0.1.2" @@ -2581,12 +2560,6 @@ dependencies = [ "vcpkg", ] -[[package]] -name = "option-ext" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" - [[package]] name = "ordered-float" version = "2.10.0" @@ -3951,7 +3924,6 @@ dependencies = [ "dashmap", "data-encoding", "derive_more", - "dirs", "enum-map", "erased-serde", "error-stack",