42
42
43
43
from abc import ABC , abstractmethod
44
44
from collections .abc import Sequence
45
- from daft .daft import catalog as native_catalog
45
+ from daft .daft import PyTableSource , catalog as native_catalog
46
46
from daft .daft import PyIdentifier , PyTable
47
47
from daft .logical .builder import LogicalPlanBuilder
48
48
49
49
from daft .dataframe import DataFrame
50
50
51
51
from typing import TYPE_CHECKING
52
52
53
+ from daft .logical .schema import Schema
54
+
53
55
if TYPE_CHECKING :
54
56
from daft .dataframe .dataframe import ColumnInputType
55
57
@@ -286,20 +288,32 @@ class Table(ABC):
286
288
"""Interface for python table implementations."""
287
289
288
290
@staticmethod
289
- def from_df (dataframe : DataFrame ) -> Table :
291
+ def from_df (name : str , dataframe : DataFrame ) -> Table :
290
292
"""Returns a read-only table backed by the DataFrame."""
291
- return PyTable .from_builder (dataframe ._builder ._builder )
293
+ from daft .catalog .__memory import MemoryTable
294
+
295
+ return MemoryTable (name , dataframe )
292
296
293
297
@staticmethod
294
298
def _from_obj (obj : object ) -> Table :
295
299
"""Returns a Daft Table from a supported object type or raises an error."""
296
- if isinstance (obj , DataFrame ):
297
- return Table .from_df (obj )
298
300
raise ValueError (f"Unsupported table type: { type (obj )} " )
299
301
300
- @property
301
- def inner (self ) -> object | None :
302
- """Returns the inner table object if this is an adapter."""
302
+ # TODO catalog APIs part 3
303
+ # @property
304
+ # @abstractmethod
305
+ # def name(self) -> str:
306
+ # """Returns the table name."""
307
+
308
+ # TODO catalog APIs part 3
309
+ # @property
310
+ # @abstractmethod
311
+ # def inner(self) -> object | None:
312
+ # """Returns the inner table object if this is an adapter."""
313
+
314
+ @abstractmethod
315
+ def read (self ) -> DataFrame :
316
+ """Returns a DataFrame from this table."""
303
317
304
318
# TODO deprecated catalog APIs #3819
305
319
def to_dataframe (self ) -> DataFrame :
@@ -310,14 +324,62 @@ def to_dataframe(self) -> DataFrame:
310
324
)
311
325
return self .read ()
312
326
313
- @abstractmethod
314
- def read (self ) -> DataFrame :
315
- """Returns a DataFrame from this table."""
316
-
317
327
def select (self , * columns : ColumnInputType ) -> DataFrame :
318
328
"""Returns a DataFrame from this table with the selected columns."""
319
329
return self .read ().select (* columns )
320
330
321
331
def show (self , n : int = 8 ) -> None :
322
332
"""Shows the first n rows from this table."""
323
333
return self .read ().show (n )
334
+
335
+
336
+ class TableSource :
337
+ """A TableSource is used to create a new table; this could be a Schema or DataFrame."""
338
+
339
+ _source : PyTableSource
340
+
341
+ def __init__ (self ) -> None :
342
+ raise ValueError ("We do not support creating a TableSource via __init__" )
343
+
344
+ @staticmethod
345
+ def from_df (df : DataFrame ) -> TableSource :
346
+ s = TableSource .__new__ (TableSource )
347
+ s ._source = PyTableSource .from_builder (df ._builder ._builder )
348
+ return s
349
+
350
+ @staticmethod
351
+ def _from_obj (obj : object = None ) -> TableSource :
352
+ # TODO for future sources, consider https://github.com/Eventual-Inc/Daft/pull/2864
353
+ if obj is None :
354
+ return TableSource ._from_none ()
355
+ elif isinstance (obj , DataFrame ):
356
+ return TableSource .from_df (obj )
357
+ elif isinstance (obj , str ):
358
+ return TableSource ._from_path (obj )
359
+ elif isinstance (obj , Schema ):
360
+ return TableSource ._from_schema (obj )
361
+ else :
362
+ raise Exception (f"Unknown table source: { obj } " )
363
+
364
+ @staticmethod
365
+ def _from_none () -> TableSource :
366
+ # for creating temp mutable tables, but we don't have those yet
367
+ # s = TableSource.__new__(TableSource)
368
+ # s._source = PyTableSource.empty()
369
+ # return s
370
+ # todo temp workaround just use an empty schema
371
+ return TableSource ._from_schema (Schema ._from_fields ([]))
372
+
373
+ @staticmethod
374
+ def _from_schema (schema : Schema ) -> TableSource :
375
+ # we don't have mutable temp tables, so just make an empty view
376
+ # s = TableSource.__new__(TableSource)
377
+ # s._source = PyTableSource.from_schema(schema._schema)
378
+ # return s
379
+ # todo temp workaround until create_table is wired
380
+ return TableSource .from_df (DataFrame ._from_pylist ([]))
381
+
382
+ @staticmethod
383
+ def _from_path (path : str ) -> TableSource :
384
+ # for supporting daft.create_table("t", "/path/to/data") <-> CREATE TABLE t AS '/path/to/my.data'
385
+ raise NotImplementedError ("creating a table source from a path is not yet supported." )
0 commit comments