Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit af87a4f

Browse files
committedFeb 19, 2025
Ports tests and removes deprecated APIs
1 parent c47b29e commit af87a4f

File tree

14 files changed

+97
-566
lines changed

14 files changed

+97
-566
lines changed
 

‎Cargo.lock

+1-12
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎Cargo.toml

+4-8
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ common-tracing = {path = "src/common/tracing", default-features = false}
1212
common-version = {path = "src/common/version", default-features = false}
1313
daft-algebra = {path = "src/daft-algebra", default-features = false}
1414
daft-catalog = {path = "src/daft-catalog", default-features = false}
15-
daft-catalog-python-catalog = {path = "src/daft-catalog/python-catalog", optional = true}
1615
daft-compression = {path = "src/daft-compression", default-features = false}
1716
daft-connect = {path = "src/daft-connect", optional = true}
1817
daft-context = {path = "src/daft-context", default-features = false}
@@ -50,15 +49,17 @@ sysinfo = {workspace = true}
5049
[features]
5150
# maturin will turn this on
5251
python = [
52+
"dep:pyo3",
53+
"dep:pyo3-log",
5354
"common-daft-config/python",
5455
"common-display/python",
5556
"common-partitioning/python",
5657
"common-resource-request/python",
5758
"common-file-formats/python",
5859
"common-scan-info/python",
5960
"common-system-info/python",
60-
"daft-catalog-python-catalog/python",
6161
"daft-catalog/python",
62+
"daft-connect/python",
6263
"daft-context/python",
6364
"daft-core/python",
6465
"daft-csv/python",
@@ -80,12 +81,7 @@ python = [
8081
"daft-session/python",
8182
"daft-stats/python",
8283
"daft-recordbatch/python",
83-
"daft-writers/python",
84-
"dep:daft-catalog-python-catalog",
85-
"dep:daft-connect",
86-
"daft-connect/python",
87-
"dep:pyo3",
88-
"dep:pyo3-log"
84+
"daft-writers/python"
8985
]
9086

9187
[lib]

‎src/daft-catalog/Cargo.toml

-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
common-error = {path = "../common/error", default-features = false}
33
daft-core = {path = "../daft-core", default-features = false}
44
daft-logical-plan = {path = "../daft-logical-plan", default-features = false}
5-
lazy_static = {workspace = true}
65
pyo3 = {workspace = true, optional = true}
76
sqlparser = {workspace = true}
87
snafu.workspace = true

‎src/daft-catalog/python-catalog/Cargo.toml

-14
This file was deleted.

‎src/daft-catalog/python-catalog/src/lib.rs

-2
This file was deleted.

‎src/daft-catalog/python-catalog/src/python.rs

-172
This file was deleted.

‎src/daft-catalog/src/data_catalog.rs

-13
This file was deleted.

‎src/daft-catalog/src/data_catalog_table.rs

-11
This file was deleted.

‎src/daft-catalog/src/error.rs

+9-5
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,8 @@ pub enum Error {
3232
Unsupported { message: String },
3333

3434
#[cfg(feature = "python")]
35-
#[snafu(display("Python error during {}: {}", context, source))]
36-
PythonError {
37-
source: pyo3::PyErr,
38-
context: String,
39-
},
35+
#[snafu(display("Python error: {}", source))]
36+
PythonError { source: pyo3::PyErr },
4037
}
4138

4239
impl Error {
@@ -81,3 +78,10 @@ impl From<Error> for PyErr {
8178
daft_error.into()
8279
}
8380
}
81+
82+
#[cfg(feature = "python")]
83+
impl From<PyErr> for Error {
84+
fn from(value: PyErr) -> Self {
85+
Error::PythonError { source: value }
86+
}
87+
}

‎src/daft-catalog/src/lib.rs

-224
Original file line numberDiff line numberDiff line change
@@ -16,227 +16,3 @@ pub use python::register_modules;
1616

1717
// TODO audit daft-catalog and daft-session errors.
1818
pub mod error;
19-
20-
// ----------------------------------
21-
// TODO deprecated catalog APIs #3819
22-
// ----------------------------------
23-
24-
mod data_catalog;
25-
mod data_catalog_table;
26-
27-
// Export public-facing traits
28-
use std::{collections::HashMap, default, sync::Arc};
29-
30-
use daft_logical_plan::LogicalPlanBuilder;
31-
pub use data_catalog::DataCatalog;
32-
pub use data_catalog_table::DataCatalogTable;
33-
use error::{Error, Result};
34-
35-
pub mod global_catalog {
36-
use std::sync::{Arc, RwLock};
37-
38-
use lazy_static::lazy_static;
39-
40-
use crate::{DaftCatalog, DataCatalog};
41-
42-
lazy_static! {
43-
pub(crate) static ref GLOBAL_DAFT_META_CATALOG: RwLock<DaftCatalog> =
44-
RwLock::new(DaftCatalog::new_from_env());
45-
}
46-
47-
/// Register a DataCatalog with the global DaftMetaCatalog
48-
pub fn register_catalog(catalog: Arc<dyn DataCatalog>, name: Option<&str>) {
49-
GLOBAL_DAFT_META_CATALOG
50-
.write()
51-
.unwrap()
52-
.register_catalog(catalog, name);
53-
}
54-
55-
/// Unregisters a catalog with the global DaftMetaCatalog
56-
pub fn unregister_catalog(name: Option<&str>) -> bool {
57-
GLOBAL_DAFT_META_CATALOG
58-
.write()
59-
.unwrap()
60-
.unregister_catalog(name)
61-
}
62-
}
63-
64-
/// Name of the default catalog
65-
static DEFAULT_CATALOG_NAME: &str = "default";
66-
67-
/// The [`DaftMetaCatalog`] is a catalog of [`DataCatalog`] implementations
68-
///
69-
/// Users of Daft can register various [`DataCatalog`] with Daft, enabling
70-
/// discovery of tables across various [`DataCatalog`] implementations.
71-
#[derive(Debug, Clone, Default)]
72-
pub struct DaftCatalog {
73-
/// Map of catalog names to the DataCatalog impls.
74-
///
75-
/// NOTE: The default catalog is always named "default"
76-
data_catalogs: HashMap<String, Arc<dyn DataCatalog>>,
77-
78-
/// LogicalPlans that were "named" and registered with Daft
79-
named_tables: HashMap<String, LogicalPlanBuilder>,
80-
}
81-
82-
impl DaftCatalog {
83-
/// Create a `DaftMetaCatalog` from the current environment
84-
pub fn new_from_env() -> Self {
85-
// TODO: Parse a YAML file to produce the catalog
86-
DaftCatalog {
87-
data_catalogs: default::Default::default(),
88-
named_tables: default::Default::default(),
89-
}
90-
}
91-
92-
/// Register a new [`DataCatalog`] with the `DaftMetaCatalog`.
93-
///
94-
/// # Arguments
95-
///
96-
/// * `catalog` - The [`DataCatalog`] to register.
97-
pub fn register_catalog(&mut self, catalog: Arc<dyn DataCatalog>, name: Option<&str>) {
98-
let name = name.unwrap_or(DEFAULT_CATALOG_NAME);
99-
self.data_catalogs.insert(name.to_string(), catalog);
100-
}
101-
102-
/// Unregister a [`DataCatalog`] from the `DaftMetaCatalog`.
103-
///
104-
/// # Arguments
105-
///
106-
/// * `name` - The name of the catalog to unregister. If None, the default catalog will be unregistered.
107-
///
108-
/// # Returns
109-
///
110-
/// Returns `true` if a catalog was successfully unregistered, `false` otherwise.
111-
pub fn unregister_catalog(&mut self, name: Option<&str>) -> bool {
112-
let name = name.unwrap_or(DEFAULT_CATALOG_NAME);
113-
self.data_catalogs.remove(name).is_some()
114-
}
115-
116-
/// Registers a LogicalPlan with a name in the DaftMetaCatalog
117-
pub fn register_table(
118-
&mut self,
119-
name: &str,
120-
view: impl Into<LogicalPlanBuilder>,
121-
) -> Result<()> {
122-
// TODO this API is being removed, for now preserve the exact name as if it were delimited.
123-
self.named_tables.insert(name.into(), view.into());
124-
Ok(())
125-
}
126-
127-
/// Check if a named table is registered in the DaftCatalog
128-
pub fn contains_table(&self, name: &str) -> bool {
129-
self.named_tables.contains_key(name)
130-
}
131-
132-
/// Provides high-level functionality for reading a table of data against a [`DaftMetaCatalog`]
133-
///
134-
/// Resolves the provided table_identifier against the catalog:
135-
///
136-
/// 1. If there is an exact match for the provided `table_identifier` in the catalog's registered named tables, immediately return the exact match
137-
/// 2. If the [`DaftMetaCatalog`] has a default catalog, we will attempt to resolve the `table_identifier` against the default catalog
138-
/// 3. If the `table_identifier` is hierarchical (delimited by "."), use the first component as the Data Catalog name and resolve the rest of the components against
139-
/// the selected Data Catalog
140-
pub fn read_table(&self, table_identifier: &str) -> error::Result<LogicalPlanBuilder> {
141-
// If the name is an exact match with a registered view, return it.
142-
if let Some(view) = self.named_tables.get(table_identifier) {
143-
return Ok(view.clone());
144-
}
145-
146-
let mut searched_catalog_name = "default";
147-
let mut searched_table_name = table_identifier;
148-
149-
// Check the default catalog for a match
150-
if let Some(default_data_catalog) = self.data_catalogs.get(DEFAULT_CATALOG_NAME) {
151-
if let Some(tbl) = default_data_catalog.get_table(table_identifier)? {
152-
return tbl.as_ref().to_logical_plan_builder();
153-
}
154-
}
155-
156-
// Try to parse the catalog name from the provided table identifier by taking the first segment, split by '.'
157-
if let Some((catalog_name, table_name)) = table_identifier.split_once('.') {
158-
if let Some(data_catalog) = self.data_catalogs.get(catalog_name) {
159-
searched_catalog_name = catalog_name;
160-
searched_table_name = table_name;
161-
if let Some(tbl) = data_catalog.get_table(table_name)? {
162-
return tbl.as_ref().to_logical_plan_builder();
163-
}
164-
}
165-
}
166-
167-
// Return the error containing the last catalog/table pairing that we attempted to search on
168-
Err(Error::TableNotFound {
169-
catalog_name: searched_catalog_name.to_string(),
170-
table_id: searched_table_name.to_string(),
171-
})
172-
}
173-
174-
/// Copy from another catalog, using tables from other in case of conflict
175-
pub fn copy_from(&mut self, other: &Self) {
176-
for (name, plan) in &other.named_tables {
177-
self.named_tables.insert(name.clone(), plan.clone());
178-
}
179-
for (name, catalog) in &other.data_catalogs {
180-
self.data_catalogs.insert(name.clone(), catalog.clone());
181-
}
182-
}
183-
184-
/// TODO remove py register and read methods are moved to session
185-
/// I cannot remove DaftMetaCatalog until I invert the dependency
186-
/// so that the current register_ methods use the session rather than the catalog.
187-
pub fn into_catalog_map(self) -> HashMap<String, Arc<dyn DataCatalog>> {
188-
self.data_catalogs
189-
}
190-
}
191-
192-
#[cfg(test)]
193-
mod tests {
194-
use std::sync::Arc;
195-
196-
use daft_core::prelude::*;
197-
use daft_logical_plan::{
198-
ops::Source, source_info::PlaceHolderInfo, ClusteringSpec, LogicalPlan, LogicalPlanRef,
199-
SourceInfo,
200-
};
201-
202-
use super::*;
203-
204-
fn mock_plan() -> LogicalPlanRef {
205-
let schema = Arc::new(
206-
Schema::new(vec![
207-
Field::new("text", DataType::Utf8),
208-
Field::new("id", DataType::Int32),
209-
])
210-
.unwrap(),
211-
);
212-
LogicalPlan::Source(Source::new(
213-
schema.clone(),
214-
Arc::new(SourceInfo::PlaceHolder(PlaceHolderInfo {
215-
source_schema: schema,
216-
clustering_spec: Arc::new(ClusteringSpec::unknown()),
217-
source_id: 0,
218-
})),
219-
))
220-
.arced()
221-
}
222-
223-
#[test]
224-
fn test_register_and_unregister_named_table() {
225-
let mut catalog = DaftCatalog::new_from_env();
226-
let plan = LogicalPlanBuilder::from(mock_plan());
227-
228-
// Register a table
229-
assert!(catalog.register_table("test_table", plan.clone()).is_ok());
230-
}
231-
232-
#[test]
233-
fn test_read_registered_table() {
234-
let mut catalog = DaftCatalog::new_from_env();
235-
let plan = LogicalPlanBuilder::from(mock_plan());
236-
237-
catalog.register_table("test_table", plan).unwrap();
238-
239-
assert!(catalog.read_table("test_table").is_ok());
240-
assert!(catalog.read_table("non_existent_table").is_err());
241-
}
242-
}

‎src/daft-catalog/src/python.rs

+16-104
Original file line numberDiff line numberDiff line change
@@ -4,100 +4,7 @@ use daft_core::prelude::SchemaRef;
44
use daft_logical_plan::{LogicalPlanRef, PyLogicalPlanBuilder};
55
use pyo3::{exceptions::PyIndexError, intern, prelude::*};
66

7-
use crate::{
8-
error::Result, global_catalog, Catalog, CatalogRef, Identifier, Table, TableRef, View,
9-
};
10-
11-
/// Read a table from the specified `DaftMetaCatalog`.
12-
///
13-
/// TODO deprecated catalog APIs #3819
14-
///
15-
/// This function reads a table from a `DaftMetaCatalog` and returns a PyLogicalPlanBuilder
16-
/// object representing the plan required to read the table.
17-
///
18-
/// The provided `table_identifier` can be:
19-
///
20-
/// 1. Name of a registered dataframe/SQL view (manually registered using `DaftMetaCatalog.register_view`)
21-
/// 2. Name of a table within the default catalog (without inputting the catalog name) for example: `"my.table.name"`
22-
/// 3. Name of a fully-qualified table path with the catalog name for example: `"my_catalog.my.table.name"`
23-
///
24-
/// Args:
25-
/// table_identifier (str): The identifier of the table to read.
26-
///
27-
/// Returns:
28-
/// PyLogicalPlanBuilder: A PyLogicalPlanBuilder object representing the table's data.
29-
///
30-
/// Raises:
31-
/// DaftError: If the table cannot be read or the specified table identifier is not found.
32-
///
33-
/// Example:
34-
/// >>> import daft
35-
/// >>> df = daft.read_table("foo")
36-
#[pyfunction]
37-
#[pyo3(name = "read_table")]
38-
fn py_read_table(table_identifier: &str) -> PyResult<PyLogicalPlanBuilder> {
39-
let logical_plan_builder = global_catalog::GLOBAL_DAFT_META_CATALOG
40-
.read()
41-
.unwrap()
42-
.read_table(table_identifier)?;
43-
Ok(PyLogicalPlanBuilder::new(logical_plan_builder))
44-
}
45-
46-
/// Register a table with the global catalog.
47-
///
48-
/// TODO deprecated catalog APIs #3819
49-
///
50-
/// This function registers a table with the global `DaftMetaCatalog` using the provided
51-
/// table identifier and logical plan.
52-
///
53-
/// Args:
54-
/// table_identifier (str): The identifier to use for the registered table.
55-
/// logical_plan (PyLogicalPlanBuilder): The logical plan representing the table's data.
56-
///
57-
/// Returns:
58-
/// str: The table identifier used for registration.
59-
///
60-
/// Example:
61-
/// >>> import daft
62-
/// >>> df = daft.read_csv("data.csv")
63-
/// >>> daft.register_table("my_table", df)
64-
#[pyfunction]
65-
#[pyo3(name = "register_table")]
66-
fn py_register_table(
67-
table_identifier: &str,
68-
logical_plan: &PyLogicalPlanBuilder,
69-
) -> PyResult<String> {
70-
global_catalog::GLOBAL_DAFT_META_CATALOG
71-
.write()
72-
.unwrap()
73-
.register_table(table_identifier, logical_plan.builder.clone())?;
74-
Ok(table_identifier.to_string())
75-
}
76-
77-
/// Unregisters a catalog from the Daft catalog system
78-
///
79-
/// TODO deprecated catalog APIs #3819
80-
///
81-
/// This function removes a previously registered catalog from the Daft catalog system.
82-
///
83-
/// Args:
84-
/// catalog_name (Optional[str]): The name of the catalog to unregister. If None, the default catalog will be unregistered.
85-
///
86-
/// Returns:
87-
/// bool: True if a catalog was successfully unregistered, False otherwise.
88-
///
89-
/// Example:
90-
/// >>> import daft
91-
/// >>> daft.unregister_catalog("my_catalog")
92-
/// True
93-
#[pyfunction]
94-
#[pyo3(
95-
name = "unregister_catalog",
96-
signature = (catalog_name=None)
97-
)]
98-
pub fn py_unregister_catalog(catalog_name: Option<&str>) -> bool {
99-
crate::global_catalog::unregister_catalog(catalog_name)
100-
}
7+
use crate::{error::Result, Catalog, CatalogRef, Identifier, Table, TableRef, View};
1018

1029
/// PyCatalog implements the Catalog ABC for some Catalog trait impl (rust->py).
10310
#[pyclass]
@@ -265,27 +172,32 @@ impl PyTableWrapper {
265172

266173
impl Table for PyTableWrapper {
267174
fn get_schema(&self) -> SchemaRef {
268-
todo!()
175+
todo!("Table.schema not yet defined")
269176
}
270177

271178
fn get_logical_plan(&self) -> Result<LogicalPlanRef> {
272-
todo!()
179+
Python::with_gil(|py| {
180+
let builder = self
181+
.0
182+
.bind(py)
183+
.call_method0("read")?
184+
.getattr("_builder")?
185+
.getattr("_builder")?;
186+
let builder = builder
187+
.downcast::<PyLogicalPlanBuilder>()
188+
.expect("downcast PyAny to PyLogicalPlanBuilder");
189+
Ok(builder.borrow().builder.plan.clone())
190+
})
273191
}
274192

275193
fn to_py(&self, py: Python<'_>) -> PyResult<PyObject> {
276194
self.0.extract(py)
277195
}
278196
}
279197

280-
pub fn register_modules<'py>(parent: &Bound<'py, PyModule>) -> PyResult<Bound<'py, PyModule>> {
198+
pub fn register_modules(parent: &Bound<'_, PyModule>) -> PyResult<()> {
281199
parent.add_class::<PyCatalog>()?;
282200
parent.add_class::<PyIdentifier>()?;
283201
parent.add_class::<PyTable>()?;
284-
// TODO deprecated catalog APIs #3819
285-
let module = PyModule::new(parent.py(), "catalog")?;
286-
module.add_wrapped(wrap_pyfunction!(py_read_table))?;
287-
module.add_wrapped(wrap_pyfunction!(py_register_table))?;
288-
module.add_wrapped(wrap_pyfunction!(py_unregister_catalog))?;
289-
parent.add_submodule(&module)?;
290-
Ok(module)
202+
Ok(())
291203
}

‎src/daft-catalog/src/table.rs

+6
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,12 @@ impl From<LogicalPlanRef> for View {
6363
}
6464
}
6565

66+
impl From<LogicalPlanBuilder> for View {
67+
fn from(value: LogicalPlanBuilder) -> Self {
68+
Self(value.plan)
69+
}
70+
}
71+
6672
impl View {
6773
pub fn arced(self) -> Arc<View> {
6874
Arc::new(self)

‎src/daft-session/Cargo.toml

+2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
[dependencies]
22
daft-catalog = {path = "../daft-catalog"}
3+
daft-core = {path = "../daft-core", default-features = false}
34
daft-logical-plan = {path = "../daft-logical-plan"}
45
pyo3 = {workspace = true, optional = true}
56
uuid = {version = "1.10.0", features = ["v4"]}
@@ -8,6 +9,7 @@ uuid = {version = "1.10.0", features = ["v4"]}
89
python = [
910
"dep:pyo3",
1011
"daft-catalog/python",
12+
"daft-core/python",
1113
"daft-logical-plan/python"
1214
]
1315

‎src/daft-session/src/session.rs

+59
Original file line numberDiff line numberDiff line change
@@ -153,3 +153,62 @@ impl Default for Session {
153153
Self::empty()
154154
}
155155
}
156+
157+
/// Migrated from daft-catalog DaftMetaCatalog tests
158+
#[cfg(test)]
159+
mod tests {
160+
use std::sync::Arc;
161+
162+
use daft_catalog::View;
163+
use daft_core::prelude::*;
164+
use daft_logical_plan::{
165+
ops::Source, source_info::PlaceHolderInfo, ClusteringSpec, LogicalPlan, LogicalPlanBuilder,
166+
LogicalPlanRef, SourceInfo,
167+
};
168+
169+
use super::*;
170+
171+
fn mock_plan() -> LogicalPlanRef {
172+
let schema = Arc::new(
173+
Schema::new(vec![
174+
Field::new("text", DataType::Utf8),
175+
Field::new("id", DataType::Int32),
176+
])
177+
.unwrap(),
178+
);
179+
LogicalPlan::Source(Source::new(
180+
schema.clone(),
181+
Arc::new(SourceInfo::PlaceHolder(PlaceHolderInfo {
182+
source_schema: schema,
183+
clustering_spec: Arc::new(ClusteringSpec::unknown()),
184+
source_id: 0,
185+
})),
186+
))
187+
.arced()
188+
}
189+
190+
#[test]
191+
fn test_attach_table() {
192+
let sess = Session::empty();
193+
let plan = LogicalPlanBuilder::from(mock_plan());
194+
let view = View::from(plan).arced();
195+
196+
// Register a table
197+
assert!(sess.attach_table(view, "test_table").is_ok());
198+
}
199+
200+
#[test]
201+
fn test_get_table() {
202+
let sess = Session::empty();
203+
let plan = LogicalPlanBuilder::from(mock_plan());
204+
let view = View::from(plan).arced();
205+
206+
sess.attach_table(view, "test_table")
207+
.expect("failed to attach table");
208+
209+
assert!(sess.get_table(&Identifier::simple("test_table")).is_ok());
210+
assert!(sess
211+
.get_table(&Identifier::simple("non_existent_table"))
212+
.is_err());
213+
}
214+
}

0 commit comments

Comments
 (0)
Please sign in to comment.