42
42
43
43
from abc import ABC , abstractmethod
44
44
from collections .abc import Sequence
45
- from daft .daft import catalog as native_catalog
45
+ from daft .daft import PyTableSource , catalog as native_catalog
46
46
from daft .daft import PyIdentifier
47
47
from daft .logical .builder import LogicalPlanBuilder
48
48
49
49
from daft .dataframe import DataFrame
50
50
51
51
from typing import TYPE_CHECKING
52
52
53
+ from daft .logical .schema import Schema
54
+
53
55
if TYPE_CHECKING :
54
56
from daft .dataframe .dataframe import ColumnInputType
55
57
@@ -142,9 +144,12 @@ def register_python_catalog(catalog: object, name: str | None = None) -> str:
142
144
class Catalog (ABC ):
143
145
"""Interface for python catalog implementations."""
144
146
145
- @property
146
- def inner (self ) -> object | None :
147
- """Returns the inner catalog object if this is an adapter."""
147
+ @staticmethod
148
+ def from_pydict (tables : dict [str , Table ]) -> Catalog :
149
+ """Returns an in-memory catalog from the dictionary."""
150
+ from daft .catalog .__memory import MemoryCatalog
151
+
152
+ return MemoryCatalog (tables )
148
153
149
154
@staticmethod
150
155
def from_iceberg (obj : object ) -> Catalog :
@@ -192,7 +197,7 @@ def list_tables(self, pattern: str | None = None) -> list[str]: ...
192
197
###
193
198
194
199
@abstractmethod
195
- def get_table (self , name : str ) -> Table : ...
200
+ def get_table (self , name : str | Identifier ) -> Table : ...
196
201
197
202
# TODO deprecated catalog APIs #3819
198
203
def load_table (self , name : str ) -> Table :
@@ -227,6 +232,12 @@ def __init__(self, *parts: str):
227
232
raise ValueError ("Identifier requires at least one part." )
228
233
self ._identifier = PyIdentifier (parts [:- 1 ], parts [- 1 ])
229
234
235
+ @staticmethod
236
+ def _from_pyidentifier (identifier : PyIdentifier ) -> Identifier :
237
+ i = Identifier .__new__ (Identifier )
238
+ i ._identifier = identifier
239
+ return i
240
+
230
241
@staticmethod
231
242
def from_sql (input : str , normalize : bool = False ) -> Identifier :
232
243
"""Parses an Identifier from an SQL string, normalizing to lowercase if specified.
@@ -243,6 +254,11 @@ def from_sql(input: str, normalize: bool = False) -> Identifier:
243
254
i ._identifier = PyIdentifier .from_sql (input , normalize )
244
255
return i
245
256
257
+ @staticmethod
258
+ def from_str (input : str ) -> Identifier :
259
+ """Parses an Identifier from a dot-delimited Python string without normalization."""
260
+ return Identifier (* input .split ("." ))
261
+
246
262
def __eq__ (self , other : object ) -> bool :
247
263
if not isinstance (other , Identifier ):
248
264
return False
@@ -260,13 +276,60 @@ def __len__(self) -> int:
260
276
def __repr__ (self ) -> str :
261
277
return f"Identifier('{ self ._identifier .__repr__ ()} ')"
262
278
279
+ def __str__ (self ) -> str :
280
+ return "." .join (self )
281
+
263
282
264
283
class Table (ABC ):
265
284
"""Interface for python table implementations."""
266
285
267
- @property
268
- def inner (self ) -> object | None :
269
- """Returns the inner table object if this is an adapter."""
286
+ @staticmethod
287
+ def from_df (name : str , dataframe : DataFrame ) -> Table :
288
+ """Returns a read-only table backed by the DataFrame."""
289
+ from daft .catalog .__memory import MemoryTable
290
+
291
+ return MemoryTable (name , dataframe )
292
+
293
+ @staticmethod
294
+ def from_iceberg (obj : object ) -> Table :
295
+ """Returns a Daft Table instance from an Iceberg table."""
296
+ try :
297
+ from daft .catalog .__iceberg import IcebergTable
298
+
299
+ return IcebergTable ._from_obj (obj )
300
+ except ImportError :
301
+ raise ImportError ("Iceberg support not installed: pip install -U 'getdaft[iceberg]'" )
302
+
303
+ @staticmethod
304
+ def from_unity (obj : object ) -> Table :
305
+ """Returns a Daft Table instance from a Unity table."""
306
+ try :
307
+ from daft .catalog .__unity import UnityTable
308
+
309
+ return UnityTable ._from_obj (obj )
310
+ except ImportError :
311
+ raise ImportError ("Unity support not installed: pip install -U 'getdaft[unity]'" )
312
+
313
+ @staticmethod
314
+ def _from_obj (obj : object ) -> Table :
315
+ """Returns a Daft Table from a supported object type or raises an error."""
316
+ raise ValueError (f"Unsupported table type: { type (obj )} " )
317
+
318
+ # TODO catalog APIs part 3
319
+ # @property
320
+ # @abstractmethod
321
+ # def name(self) -> str:
322
+ # """Returns the table name."""
323
+
324
+ # TODO catalog APIs part 3
325
+ # @property
326
+ # @abstractmethod
327
+ # def inner(self) -> object | None:
328
+ # """Returns the inner table object if this is an adapter."""
329
+
330
+ @abstractmethod
331
+ def read (self ) -> DataFrame :
332
+ """Returns a DataFrame from this table."""
270
333
271
334
# TODO deprecated catalog APIs #3819
272
335
def to_dataframe (self ) -> DataFrame :
@@ -277,14 +340,62 @@ def to_dataframe(self) -> DataFrame:
277
340
)
278
341
return self .read ()
279
342
280
- @abstractmethod
281
- def read (self ) -> DataFrame :
282
- """Returns a DataFrame from this table."""
283
-
284
343
def select (self , * columns : ColumnInputType ) -> DataFrame :
285
344
"""Returns a DataFrame from this table with the selected columns."""
286
345
return self .read ().select (* columns )
287
346
288
347
def show (self , n : int = 8 ) -> None :
289
348
"""Shows the first n rows from this table."""
290
349
return self .read ().show (n )
350
+
351
+
352
+ class TableSource :
353
+ """A TableSource is used to create a new table; this could be a Schema or DataFrame."""
354
+
355
+ _source : PyTableSource
356
+
357
+ def __init__ (self ) -> None :
358
+ raise ValueError ("We do not support creating a TableSource via __init__" )
359
+
360
+ @staticmethod
361
+ def from_df (df : DataFrame ) -> TableSource :
362
+ s = TableSource .__new__ (TableSource )
363
+ s ._source = PyTableSource .from_builder (df ._builder ._builder )
364
+ return s
365
+
366
+ @staticmethod
367
+ def _from_obj (obj : object = None ) -> TableSource :
368
+ # TODO for future sources, consider https://github.com/Eventual-Inc/Daft/pull/2864
369
+ if obj is None :
370
+ return TableSource ._from_none ()
371
+ elif isinstance (obj , DataFrame ):
372
+ return TableSource .from_df (obj )
373
+ elif isinstance (obj , str ):
374
+ return TableSource ._from_path (obj )
375
+ elif isinstance (obj , Schema ):
376
+ return TableSource ._from_schema (obj )
377
+ else :
378
+ raise Exception (f"Unknown table source: { obj } " )
379
+
380
+ @staticmethod
381
+ def _from_none () -> TableSource :
382
+ # for creating temp mutable tables, but we don't have those yet
383
+ # s = TableSource.__new__(TableSource)
384
+ # s._source = PyTableSource.empty()
385
+ # return s
386
+ # todo temp workaround just use an empty schema
387
+ return TableSource ._from_schema (Schema ._from_fields ([]))
388
+
389
+ @staticmethod
390
+ def _from_schema (schema : Schema ) -> TableSource :
391
+ # we don't have mutable temp tables, so just make an empty view
392
+ # s = TableSource.__new__(TableSource)
393
+ # s._source = PyTableSource.from_schema(schema._schema)
394
+ # return s
395
+ # todo temp workaround until create_table is wired
396
+ return TableSource .from_df (DataFrame ._from_pylist ([]))
397
+
398
+ @staticmethod
399
+ def _from_path (path : str ) -> TableSource :
400
+ # for supporting daft.create_table("t", "/path/to/data") <-> CREATE TABLE t AS '/path/to/my.data'
401
+ raise NotImplementedError ("creating a table source from a path is not yet supported." )
0 commit comments