42
42
43
43
from abc import ABC , abstractmethod
44
44
from collections .abc import Sequence
45
- from daft .daft import catalog as native_catalog
45
+ from daft .daft import PyTableSource , catalog as native_catalog
46
46
from daft .daft import PyIdentifier , PyTable
47
47
from daft .logical .builder import LogicalPlanBuilder
48
48
49
49
from daft .dataframe import DataFrame
50
50
51
51
from typing import TYPE_CHECKING
52
52
53
+ from daft .logical .schema import Schema
54
+
53
55
if TYPE_CHECKING :
54
56
from daft .dataframe .dataframe import ColumnInputType
55
57
@@ -286,9 +288,11 @@ class Table(ABC):
286
288
"""Interface for python table implementations."""
287
289
288
290
@staticmethod
289
- def from_df (dataframe : DataFrame ) -> Table :
291
+ def from_df (name : str , dataframe : DataFrame ) -> Table :
290
292
"""Returns a read-only table backed by the DataFrame."""
291
- return PyTable .from_builder (dataframe ._builder ._builder )
293
+ from daft .catalog .__memory import MemoryTable
294
+
295
+ return MemoryTable (name , dataframe )
292
296
293
297
@staticmethod
294
298
def from_iceberg (obj : object ) -> Table :
@@ -314,10 +318,24 @@ def from_unity(obj: object) -> Table:
314
318
@staticmethod
315
319
def _from_obj (obj : object ) -> Table :
316
320
"""Returns a Daft Table from a supported object type or raises an error."""
317
- if isinstance (obj , DataFrame ):
318
- return Table .from_df (obj )
319
321
raise ValueError (f"Unsupported table type: { type (obj )} " )
320
322
323
+ # TODO catalog APIs part 3
324
+ # @property
325
+ # @abstractmethod
326
+ # def name(self) -> str:
327
+ # """Returns the table name."""
328
+
329
+ # TODO catalog APIs part 3
330
+ # @property
331
+ # @abstractmethod
332
+ # def inner(self) -> object | None:
333
+ # """Returns the inner table object if this is an adapter."""
334
+
335
+ @abstractmethod
336
+ def read (self ) -> DataFrame :
337
+ """Returns a DataFrame from this table."""
338
+
321
339
# TODO deprecated catalog APIs #3819
322
340
def to_dataframe (self ) -> DataFrame :
323
341
"""DEPRECATED: Please use `read` instead; version 0.5.0!"""
@@ -327,14 +345,62 @@ def to_dataframe(self) -> DataFrame:
327
345
)
328
346
return self .read ()
329
347
330
- @abstractmethod
331
- def read (self ) -> DataFrame :
332
- """Returns a DataFrame from this table."""
333
-
334
348
def select (self , * columns : ColumnInputType ) -> DataFrame :
335
349
"""Returns a DataFrame from this table with the selected columns."""
336
350
return self .read ().select (* columns )
337
351
338
352
def show (self , n : int = 8 ) -> None :
339
353
"""Shows the first n rows from this table."""
340
354
return self .read ().show (n )
355
+
356
+
357
+ class TableSource :
358
+ """A TableSource is used to create a new table; this could be a Schema or DataFrame."""
359
+
360
+ _source : PyTableSource
361
+
362
+ def __init__ (self ) -> None :
363
+ raise ValueError ("We do not support creating a TableSource via __init__" )
364
+
365
+ @staticmethod
366
+ def from_df (df : DataFrame ) -> TableSource :
367
+ s = TableSource .__new__ (TableSource )
368
+ s ._source = PyTableSource .from_builder (df ._builder ._builder )
369
+ return s
370
+
371
+ @staticmethod
372
+ def _from_obj (obj : object = None ) -> TableSource :
373
+ # TODO for future sources, consider https://github.com/Eventual-Inc/Daft/pull/2864
374
+ if obj is None :
375
+ return TableSource ._from_none ()
376
+ elif isinstance (obj , DataFrame ):
377
+ return TableSource .from_df (obj )
378
+ elif isinstance (obj , str ):
379
+ return TableSource ._from_path (obj )
380
+ elif isinstance (obj , Schema ):
381
+ return TableSource ._from_schema (obj )
382
+ else :
383
+ raise Exception (f"Unknown table source: { obj } " )
384
+
385
+ @staticmethod
386
+ def _from_none () -> TableSource :
387
+ # for creating temp mutable tables, but we don't have those yet
388
+ # s = TableSource.__new__(TableSource)
389
+ # s._source = PyTableSource.empty()
390
+ # return s
391
+ # todo temp workaround just use an empty schema
392
+ return TableSource ._from_schema (Schema ._from_fields ([]))
393
+
394
+ @staticmethod
395
+ def _from_schema (schema : Schema ) -> TableSource :
396
+ # we don't have mutable temp tables, so just make an empty view
397
+ # s = TableSource.__new__(TableSource)
398
+ # s._source = PyTableSource.from_schema(schema._schema)
399
+ # return s
400
+ # todo temp workaround until create_table is wired
401
+ return TableSource .from_df (DataFrame ._from_pylist ([]))
402
+
403
+ @staticmethod
404
+ def _from_path (path : str ) -> TableSource :
405
+ # for supporting daft.create_table("t", "/path/to/data") <-> CREATE TABLE t AS '/path/to/my.data'
406
+ raise NotImplementedError ("creating a table source from a path is not yet supported." )
0 commit comments