From 949fbea8816c20d5eeedf71a6f30687df6892a4a Mon Sep 17 00:00:00 2001 From: Nick Crews Date: Sat, 24 Aug 2024 06:58:35 -0800 Subject: [PATCH] feat(api): support `SchemaLike` in `Backend.create_table()` (#9885) Co-authored-by: Phillip Cloud <417981+cpcloud@users.noreply.github.com> --- ibis/backends/bigquery/__init__.py | 4 +++- ibis/backends/clickhouse/__init__.py | 9 ++++----- ibis/backends/datafusion/__init__.py | 4 +++- ibis/backends/duckdb/__init__.py | 6 +++++- ibis/backends/exasol/__init__.py | 4 +++- ibis/backends/impala/__init__.py | 6 ++++-- ibis/backends/mssql/__init__.py | 4 +++- ibis/backends/mysql/__init__.py | 4 +++- ibis/backends/oracle/__init__.py | 4 +++- ibis/backends/pandas/__init__.py | 2 ++ ibis/backends/polars/__init__.py | 2 +- ibis/backends/postgres/__init__.py | 4 +++- ibis/backends/pyspark/__init__.py | 3 ++- ibis/backends/risingwave/__init__.py | 5 ++++- ibis/backends/snowflake/__init__.py | 4 +++- ibis/backends/sqlite/__init__.py | 2 +- ibis/backends/tests/test_client.py | 22 +++++----------------- ibis/backends/trino/__init__.py | 4 +++- 18 files changed, 55 insertions(+), 38 deletions(-) diff --git a/ibis/backends/bigquery/__init__.py b/ibis/backends/bigquery/__init__.py index a1bef8f57f2f..adc502ef8d34 100644 --- a/ibis/backends/bigquery/__init__.py +++ b/ibis/backends/bigquery/__init__.py @@ -923,7 +923,7 @@ def create_table( | pl.LazyFrame | None = None, *, - schema: ibis.Schema | None = None, + schema: sch.SchemaLike | None = None, database: str | None = None, temp: bool = False, overwrite: bool = False, @@ -972,6 +972,8 @@ def create_table( """ if obj is None and schema is None: raise com.IbisError("One of the `schema` or `obj` parameter is required") + if schema is not None: + schema = ibis.schema(schema) if isinstance(obj, ir.Table) and schema is not None: if not schema.equals(obj.schema()): diff --git a/ibis/backends/clickhouse/__init__.py b/ibis/backends/clickhouse/__init__.py index 05016732727b..eb11255d4222 100644 --- a/ibis/backends/clickhouse/__init__.py +++ b/ibis/backends/clickhouse/__init__.py @@ -611,7 +611,7 @@ def create_table( | pl.LazyFrame | None = None, *, - schema: ibis.Schema | None = None, + schema: sch.SchemaLike | None = None, database: str | None = None, temp: bool = False, overwrite: bool = False, @@ -666,13 +666,12 @@ def create_table( if obj is None and schema is None: raise com.IbisError("The `schema` or `obj` parameter is required") + if schema is not None: + schema = ibis.schema(schema) if obj is not None and not isinstance(obj, ir.Expr): obj = ibis.memtable(obj, schema=schema) - if schema is None: - schema = obj.schema() - this = sge.Schema( this=sg.table(name, db=database), expressions=[ @@ -680,7 +679,7 @@ def create_table( this=sg.to_identifier(name, quoted=self.compiler.quoted), kind=self.compiler.type_mapper.from_ibis(typ), ) - for name, typ in schema.items() + for name, typ in (schema or obj.schema()).items() ], ) properties = [ diff --git a/ibis/backends/datafusion/__init__.py b/ibis/backends/datafusion/__init__.py index fc8108602490..ee520f4f2f57 100644 --- a/ibis/backends/datafusion/__init__.py +++ b/ibis/backends/datafusion/__init__.py @@ -596,7 +596,7 @@ def create_table( | pl.LazyFrame | None = None, *, - schema: sch.Schema | None = None, + schema: sch.SchemaLike | None = None, database: str | None = None, temp: bool = False, overwrite: bool = False, @@ -625,6 +625,8 @@ def create_table( """ if obj is None and schema is None: raise ValueError("Either `obj` or `schema` must be specified") + if schema is not None: + schema = ibis.schema(schema) properties = [] diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index f1e2f647b156..13890d1b5d01 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -41,6 +41,8 @@ import torch from fsspec import AbstractFileSystem + from ibis.expr.schema import SchemaLike + _UDF_INPUT_TYPE_MAPPING = { InputType.PYARROW: duckdb.functional.ARROW, @@ -103,7 +105,7 @@ def create_table( | pl.LazyFrame | None = None, *, - schema: ibis.Schema | None = None, + schema: SchemaLike | None = None, database: str | None = None, temp: bool = False, overwrite: bool = False, @@ -147,6 +149,8 @@ def create_table( if obj is None and schema is None: raise ValueError("Either `obj` or `schema` must be specified") + if schema is not None: + schema = ibis.schema(schema) properties = [] diff --git a/ibis/backends/exasol/__init__.py b/ibis/backends/exasol/__init__.py index 759dfc940d1d..c3b2a1fe8233 100644 --- a/ibis/backends/exasol/__init__.py +++ b/ibis/backends/exasol/__init__.py @@ -314,7 +314,7 @@ def create_table( | pl.LazyFrame | None = None, *, - schema: sch.Schema | None = None, + schema: sch.SchemaLike | None = None, database: str | None = None, overwrite: bool = False, temp: bool = False, @@ -342,6 +342,8 @@ def create_table( """ if obj is None and schema is None: raise ValueError("Either `obj` or `schema` must be specified") + if schema is not None: + schema = ibis.schema(schema) if temp: raise com.UnsupportedOperationError( diff --git a/ibis/backends/impala/__init__.py b/ibis/backends/impala/__init__.py index 2baa4cbd7e8f..04aebafaf6b1 100644 --- a/ibis/backends/impala/__init__.py +++ b/ibis/backends/impala/__init__.py @@ -463,7 +463,7 @@ def create_table( | pl.LazyFrame | None = None, *, - schema=None, + schema: sch.SchemaLike | None = None, database=None, temp: bool | None = None, overwrite: bool = False, @@ -510,6 +510,8 @@ def create_table( """ if obj is None and schema is None: raise com.IbisError("The schema or obj parameter is required") + if schema is not None: + schema = ibis.schema(schema) if temp is not None: raise NotImplementedError( @@ -547,7 +549,7 @@ def create_table( self._safe_exec_sql( CreateTableWithSchema( name, - schema if schema is not None else obj.schema(), + schema or obj.schema(), database=database or self.current_database, format=format, external=external, diff --git a/ibis/backends/mssql/__init__.py b/ibis/backends/mssql/__init__.py index 508bbcf43830..9367adcd30e7 100644 --- a/ibis/backends/mssql/__init__.py +++ b/ibis/backends/mssql/__init__.py @@ -564,7 +564,7 @@ def create_table( | pl.LazyFrame | None = None, *, - schema: sch.Schema | None = None, + schema: sch.SchemaLike | None = None, database: str | None = None, temp: bool = False, overwrite: bool = False, @@ -605,6 +605,8 @@ def create_table( """ if obj is None and schema is None: raise ValueError("Either `obj` or `schema` must be specified") + if schema is not None: + schema = ibis.schema(schema) if temp and overwrite: raise ValueError( diff --git a/ibis/backends/mysql/__init__.py b/ibis/backends/mysql/__init__.py index 7b279fd1326f..5a0ef74edbad 100644 --- a/ibis/backends/mysql/__init__.py +++ b/ibis/backends/mysql/__init__.py @@ -388,13 +388,15 @@ def create_table( | pl.LazyFrame | None = None, *, - schema: ibis.Schema | None = None, + schema: sch.SchemaLike | None = None, database: str | None = None, temp: bool = False, overwrite: bool = False, ) -> ir.Table: if obj is None and schema is None: raise ValueError("Either `obj` or `schema` must be specified") + if schema is not None: + schema = ibis.schema(schema) properties = [] diff --git a/ibis/backends/oracle/__init__.py b/ibis/backends/oracle/__init__.py index c4d43f280ac8..9005f96c5b82 100644 --- a/ibis/backends/oracle/__init__.py +++ b/ibis/backends/oracle/__init__.py @@ -374,7 +374,7 @@ def create_table( | pl.LazyFrame | None = None, *, - schema: ibis.Schema | None = None, + schema: sch.SchemaLike | None = None, database: str | None = None, temp: bool = False, overwrite: bool = False, @@ -403,6 +403,8 @@ def create_table( """ if obj is None and schema is None: raise ValueError("Either `obj` or `schema` must be specified") + if schema is not None: + schema = ibis.schema(schema) properties = [] diff --git a/ibis/backends/pandas/__init__.py b/ibis/backends/pandas/__init__.py index a1c0eddd91f4..2d2703a89eac 100644 --- a/ibis/backends/pandas/__init__.py +++ b/ibis/backends/pandas/__init__.py @@ -210,6 +210,8 @@ def create_table( ) if obj is None and schema is None: raise com.IbisError("The schema or obj parameter is required") + if schema is not None: + schema = ibis.schema(schema) if obj is not None: df = self._convert_object(obj) diff --git a/ibis/backends/polars/__init__.py b/ibis/backends/polars/__init__.py index 4ec1f50805e0..a87ff7683c70 100644 --- a/ibis/backends/polars/__init__.py +++ b/ibis/backends/polars/__init__.py @@ -362,7 +362,7 @@ def create_table( | pl.LazyFrame | None = None, *, - schema: ibis.Schema | None = None, + schema: sch.SchemaLike | None = None, database: str | None = None, temp: bool | None = None, overwrite: bool = False, diff --git a/ibis/backends/postgres/__init__.py b/ibis/backends/postgres/__init__.py index 616480757336..eed7c5eb86c9 100644 --- a/ibis/backends/postgres/__init__.py +++ b/ibis/backends/postgres/__init__.py @@ -626,7 +626,7 @@ def create_table( | pl.LazyFrame | None = None, *, - schema: ibis.Schema | None = None, + schema: sch.SchemaLike | None = None, database: str | None = None, temp: bool = False, overwrite: bool = False, @@ -655,6 +655,8 @@ def create_table( """ if obj is None and schema is None: raise ValueError("Either `obj` or `schema` must be specified") + if schema is not None: + schema = ibis.schema(schema) properties = [] diff --git a/ibis/backends/pyspark/__init__.py b/ibis/backends/pyspark/__init__.py index bcdde50a2810..a9af6b75ad2a 100644 --- a/ibis/backends/pyspark/__init__.py +++ b/ibis/backends/pyspark/__init__.py @@ -547,7 +547,7 @@ def create_table( ir.Table | pd.DataFrame | pa.Table | pl.DataFrame | pl.LazyFrame | None ) = None, *, - schema: sch.Schema | None = None, + schema: sch.SchemaLike | None = None, database: str | None = None, temp: bool | None = None, overwrite: bool = False, @@ -608,6 +608,7 @@ def create_table( df = self._session.sql(query) df.write.saveAsTable(name, format=format, mode=mode) elif schema is not None: + schema = ibis.schema(schema) schema = PySparkSchema.from_ibis(schema) with self._active_catalog_database(catalog, db): self._session.catalog.createTable(name, schema=schema, format=format) diff --git a/ibis/backends/risingwave/__init__.py b/ibis/backends/risingwave/__init__.py index 2270a67dc998..b6ec548b119b 100644 --- a/ibis/backends/risingwave/__init__.py +++ b/ibis/backends/risingwave/__init__.py @@ -14,6 +14,7 @@ import ibis.backends.sql.compilers as sc import ibis.common.exceptions as com import ibis.expr.operations as ops +import ibis.expr.schema as sch import ibis.expr.types as ir from ibis import util from ibis.backends.postgres import Backend as PostgresBackend @@ -130,7 +131,7 @@ def create_table( | pl.LazyFrame | None = None, *, - schema: ibis.Schema | None = None, + schema: sch.SchemaLike | None = None, database: str | None = None, temp: bool = False, overwrite: bool = False, @@ -177,6 +178,8 @@ def create_table( """ if obj is None and schema is None: raise ValueError("Either `obj` or `schema` must be specified") + if schema is not None: + schema = ibis.schema(schema) if connector_properties is not None and ( encode_format is None or data_format is None diff --git a/ibis/backends/snowflake/__init__.py b/ibis/backends/snowflake/__init__.py index 4e3926b9b8a5..9d0ec35497c3 100644 --- a/ibis/backends/snowflake/__init__.py +++ b/ibis/backends/snowflake/__init__.py @@ -765,7 +765,7 @@ def create_table( | pl.LazyFrame | None = None, *, - schema: sch.Schema | None = None, + schema: sch.SchemaLike | None = None, database: str | None = None, temp: bool = False, overwrite: bool = False, @@ -797,6 +797,8 @@ def create_table( """ if obj is None and schema is None: raise ValueError("Either `obj` or `schema` must be specified") + if schema is not None: + schema = ibis.schema(schema) quoted = self.compiler.quoted diff --git a/ibis/backends/sqlite/__init__.py b/ibis/backends/sqlite/__init__.py index e770cecd72be..b9422b21d335 100644 --- a/ibis/backends/sqlite/__init__.py +++ b/ibis/backends/sqlite/__init__.py @@ -436,7 +436,7 @@ def create_table( | pl.LazyFrame | None = None, *, - schema: ibis.Schema | None = None, + schema: sch.SchemaLike | None = None, database: str | None = None, temp: bool = False, overwrite: bool = False, diff --git a/ibis/backends/tests/test_client.py b/ibis/backends/tests/test_client.py index 49848d8f43dc..55dc7c732f53 100644 --- a/ibis/backends/tests/test_client.py +++ b/ibis/backends/tests/test_client.py @@ -85,16 +85,11 @@ def _create_temp_table_with_schema(backend, con, temp_table_name, schema, data=N "sch", [ None, - ibis.schema( - dict( - first_name="string", - last_name="string", - department_name="string", - salary="float64", - ) - ), + dict(first_name="string", salary="float64"), + dict(first_name="string", salary="float64").items(), + ibis.schema(dict(first_name="string", salary="float64")), ], - ids=["no_schema", "schema"], + ids=["no_schema", "dict_schema", "tuples", "schema"], ) @pytest.mark.notimpl(["druid"]) @pytest.mark.notimpl( @@ -102,14 +97,7 @@ def _create_temp_table_with_schema(backend, con, temp_table_name, schema, data=N reason="Flink backend supports creating only TEMPORARY VIEW for in-memory data.", ) def test_create_table(backend, con, temp_table, func, sch): - df = pd.DataFrame( - { - "first_name": ["A", "B", "C"], - "last_name": ["D", "E", "F"], - "department_name": ["AA", "BB", "CC"], - "salary": [100.0, 200.0, 300.0], - } - ) + df = pd.DataFrame({"first_name": ["A", "B", "C"], "salary": [100.0, 200.0, 300.0]}) con.create_table(temp_table, func(df), schema=sch) result = ( diff --git a/ibis/backends/trino/__init__.py b/ibis/backends/trino/__init__.py index c1cc00aa3988..e694f042fae1 100644 --- a/ibis/backends/trino/__init__.py +++ b/ibis/backends/trino/__init__.py @@ -395,7 +395,7 @@ def create_table( | pl.LazyFrame | None = None, *, - schema: sch.Schema | None = None, + schema: sch.SchemaLike | None = None, database: str | None = None, temp: bool = False, overwrite: bool = False, @@ -435,6 +435,8 @@ def create_table( """ if obj is None and schema is None: raise com.IbisError("One of the `schema` or `obj` parameter is required") + if schema is not None: + schema = ibis.schema(schema) if temp: raise NotImplementedError(