Skip to content

Commit

Permalink
refactor(api): remove schema (ibis-project#10149)
Browse files Browse the repository at this point in the history
BREAKING CHANGE: Removed hierarchical usage of schema. 

Ibis uses the following naming conventions:
- schema: a mapping of column names to datatypes
- database: a collection of tables
- catalog: a collection of databases

---------

Co-authored-by: Gil Forsyth <[email protected]>
Co-authored-by: Gil Forsyth <[email protected]>
  • Loading branch information
3 people committed Sep 24, 2024
1 parent fc626cc commit 4957854
Show file tree
Hide file tree
Showing 24 changed files with 83 additions and 373 deletions.
5 changes: 1 addition & 4 deletions docs/backends/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,7 @@ def find_member_with_docstring(member):
if base not in resolved_bases:
resolved_bases.append(base)

# Remove `CanCreateSchema` and `CanListSchema` since they are deprecated
# and we don't want to document their existence.
filtered_bases = filter(lambda x: "schema" not in x.name.lower(), resolved_bases)
for base in filtered_bases:
for base in resolved_bases:
try:
parent_member = get_callable(base, member.name)
except KeyError:
Expand Down
38 changes: 0 additions & 38 deletions ibis/backends/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -741,44 +741,6 @@ def drop_database(
"""


# TODO: remove this for 10.0
class CanListSchema:
@util.deprecated(
instead="Use `list_databases` instead`", as_of="9.0", removed_in="10.0"
)
def list_schemas(
self, like: str | None = None, database: str | None = None
) -> list[str]:
return self.list_databases(like=like, catalog=database)

@property
@util.deprecated(
instead="Use `Backend.current_database` instead.",
as_of="9.0",
removed_in="10.0",
)
def current_schema(self) -> str:
return self.current_database


class CanCreateSchema(CanListSchema):
@util.deprecated(
instead="Use `create_database` instead", as_of="9.0", removed_in="10.0"
)
def create_schema(
self, name: str, database: str | None = None, force: bool = False
) -> None:
self.create_database(name=name, catalog=database, force=force)

@util.deprecated(
instead="Use `drop_database` instead", as_of="9.0", removed_in="10.0"
)
def drop_schema(
self, name: str, database: str | None = None, force: bool = False
) -> None:
self.drop_database(name=name, catalog=database, force=force)


class CacheEntry(NamedTuple):
orig_op: ops.Relation
cached_op_ref: weakref.ref[ops.Relation]
Expand Down
64 changes: 16 additions & 48 deletions ibis/backends/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import ibis.expr.schema as sch
import ibis.expr.types as ir
from ibis import util
from ibis.backends import CanCreateDatabase, CanCreateSchema
from ibis.backends import CanCreateDatabase
from ibis.backends.bigquery.client import (
bigquery_param,
parse_project_and_dataset,
Expand Down Expand Up @@ -155,7 +155,7 @@ def _postprocess_arrow(
return table_or_batch.rename_columns(names)


class Backend(SQLBackend, CanCreateDatabase, CanCreateSchema):
class Backend(SQLBackend, CanCreateDatabase):
name = "bigquery"
compiler = sc.bigquery.compiler
supports_python_udfs = False
Expand Down Expand Up @@ -520,6 +520,10 @@ def disconnect(self) -> None:

def _parse_project_and_dataset(self, dataset) -> tuple[str, str]:
if isinstance(dataset, sge.Table):
if (sg_cat := dataset.args["catalog"]) is not None:
sg_cat.args["quoted"] = False
if (sg_db := dataset.args["db"]) is not None:
sg_db.args["quoted"] = False
dataset = dataset.sql(self.dialect)
if not dataset and not self.dataset:
raise ValueError("Unable to determine BigQuery dataset.")
Expand Down Expand Up @@ -582,9 +586,11 @@ def drop_database(
self.raw_sql(stmt.sql(self.name))

def table(
self, name: str, database: str | None = None, schema: str | None = None
self,
name: str,
database: str | None = None,
) -> ir.Table:
table_loc = self._warn_and_create_table_loc(database, schema)
table_loc = self._to_sqlglot_table(database)
table = sg.parse_one(f"`{name}`", into=sge.Table, read=self.name)

# Bigquery, unlike other backends, had existing support for specifying
Expand Down Expand Up @@ -612,10 +618,7 @@ def table(
else:
db = table.db

database = (
sg.table(None, db=db, catalog=catalog, quoted=False).sql(dialect=self.name)
or None
)
database = sg.table(None, db=db, catalog=catalog, quoted=False) or None

project, dataset = self._parse_project_and_dataset(database)

Expand Down Expand Up @@ -722,7 +725,6 @@ def insert(
self,
table_name: str,
obj: pd.DataFrame | ir.Table | list | dict,
schema: str | None = None,
database: str | None = None,
overwrite: bool = False,
):
Expand All @@ -734,15 +736,13 @@ def insert(
The name of the table to which data needs will be inserted
obj
The source data or expression to insert
schema
The name of the schema that the table is located in
database
Name of the attached database that the table is located in.
overwrite
If `True` then replace existing contents of table
"""
table_loc = self._warn_and_create_table_loc(database, schema)
table_loc = self._to_sqlglot_table(database)
catalog, db = self._to_catalog_db_tuple(table_loc)
if catalog is None:
catalog = self.current_catalog
Expand Down Expand Up @@ -896,7 +896,6 @@ def list_tables(
self,
like: str | None = None,
database: tuple[str, str] | str | None = None,
schema: str | None = None,
) -> list[str]:
"""List the tables in the database.
Expand Down Expand Up @@ -924,10 +923,8 @@ def list_tables(
To specify a table in a separate BigQuery dataset, you can pass in the
dataset and project as a string `"dataset.project"`, or as a tuple of
strings `(dataset, project)`.
schema
[deprecated] The schema (dataset) inside `database` to perform the list against.
"""
table_loc = self._warn_and_create_table_loc(database, schema)
table_loc = self._to_sqlglot_table(database)

project, dataset = self._parse_project_and_dataset(table_loc)
dataset_ref = bq.DatasetReference(project, dataset)
Expand Down Expand Up @@ -1090,11 +1087,10 @@ def drop_table(
self,
name: str,
*,
schema: str | None = None,
database: tuple[str | str] | str | None = None,
force: bool = False,
) -> None:
table_loc = self._warn_and_create_table_loc(database, schema)
table_loc = self._to_sqlglot_table(database)
catalog, db = self._to_catalog_db_tuple(table_loc)
stmt = sge.Drop(
kind="TABLE",
Expand All @@ -1112,11 +1108,10 @@ def create_view(
name: str,
obj: ir.Table,
*,
schema: str | None = None,
database: str | None = None,
overwrite: bool = False,
) -> ir.Table:
table_loc = self._warn_and_create_table_loc(database, schema)
table_loc = self._to_sqlglot_table(database)
catalog, db = self._to_catalog_db_tuple(table_loc)

stmt = sge.Create(
Expand All @@ -1137,11 +1132,10 @@ def drop_view(
self,
name: str,
*,
schema: str | None = None,
database: str | None = None,
force: bool = False,
) -> None:
table_loc = self._warn_and_create_table_loc(database, schema)
table_loc = self._to_sqlglot_table(database)
catalog, db = self._to_catalog_db_tuple(table_loc)

stmt = sge.Drop(
Expand Down Expand Up @@ -1169,32 +1163,6 @@ def _register_udfs(self, expr: ir.Expr) -> None:
def _safe_raw_sql(self, *args, **kwargs):
yield self.raw_sql(*args, **kwargs)

# TODO: remove when the schema kwarg is removed
def _warn_and_create_table_loc(self, database=None, schema=None):
if schema is not None:
self._warn_schema()
if database is not None and schema is not None:
if isinstance(database, str):
table_loc = f"{database}.{schema}"
elif isinstance(database, tuple):
table_loc = database + schema
elif schema is not None:
table_loc = schema
elif database is not None:
table_loc = database
else:
table_loc = None

table_loc = self._to_sqlglot_table(table_loc)

if table_loc is not None:
if (sg_cat := table_loc.args["catalog"]) is not None:
sg_cat.args["quoted"] = False
if (sg_db := table_loc.args["db"]) is not None:
sg_db.args["quoted"] = False

return table_loc


def compile(expr, params=None, **kwargs):
"""Compile an expression for BigQuery."""
Expand Down
26 changes: 10 additions & 16 deletions ibis/backends/bigquery/tests/system/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,16 @@ def test_list_tables(con):
tables = con.list_tables(like="functional_alltypes")
assert set(tables) == {"functional_alltypes", "functional_alltypes_parted"}

pypi_tables = [
"external",
"native",
]

assert con.list_tables()

assert con.list_tables(database="ibis-gbq.pypi") == pypi_tables
assert con.list_tables(database=("ibis-gbq", "pypi")) == pypi_tables


def test_current_catalog(con):
assert con.current_catalog == con.billing_project
Expand Down Expand Up @@ -386,22 +396,6 @@ def test_create_table_with_options(con):
con.drop_table(name)


def test_list_tables_schema_warning_refactor(con):
pypi_tables = [
"external",
"native",
]

assert con.list_tables()

# Warn but succeed for schema list
with pytest.raises(FutureWarning):
assert con.list_tables(schema="pypi") == pypi_tables

assert con.list_tables(database="ibis-gbq.pypi") == pypi_tables
assert con.list_tables(database=("ibis-gbq", "pypi")) == pypi_tables


def test_create_temp_table_from_scratch(project_id, dataset_id):
con = ibis.bigquery.connect(project_id=project_id, dataset_id=dataset_id)
name = gen_name("bigquery_temp_table")
Expand Down
9 changes: 0 additions & 9 deletions ibis/backends/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from ibis.backends import (
CanCreateCatalog,
CanCreateDatabase,
CanListSchema,
_get_backend_names,
)
from ibis.conftest import WINDOWS
Expand Down Expand Up @@ -424,14 +423,6 @@ def con_no_data(backend_no_data):
return backend_no_data.connection


@pytest.fixture(scope="session")
def con_list_schema(con):
if isinstance(con, CanListSchema):
return con
else:
pytest.skip(f"{con.name} backend cannot create schemas")


@pytest.fixture(scope="session")
def con_create_catalog(con):
if isinstance(con, CanCreateCatalog):
Expand Down
14 changes: 7 additions & 7 deletions ibis/backends/datafusion/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import ibis.expr.schema as sch
import ibis.expr.types as ir
from ibis import util
from ibis.backends import CanCreateCatalog, CanCreateDatabase, CanCreateSchema, NoUrl
from ibis.backends import CanCreateCatalog, CanCreateDatabase, NoUrl
from ibis.backends.sql import SQLBackend
from ibis.backends.sql.compilers.base import C
from ibis.common.dispatch import lazy_singledispatch
Expand Down Expand Up @@ -69,7 +69,7 @@ def as_nullable(dtype: dt.DataType) -> dt.DataType:
return dtype.copy(nullable=True)


class Backend(SQLBackend, CanCreateCatalog, CanCreateDatabase, CanCreateSchema, NoUrl):
class Backend(SQLBackend, CanCreateCatalog, CanCreateDatabase, NoUrl):
name = "datafusion"
supports_arrays = True
compiler = sc.datafusion.compiler
Expand Down Expand Up @@ -674,8 +674,10 @@ def create_table(
return self.table(name, database=database)

def truncate_table(
self, name: str, database: str | None = None, schema: str | None = None
) -> None:
self,
name: str,
database: str | None = None,
):
"""Delete all rows from a table.
Parameters
Expand All @@ -684,14 +686,12 @@ def truncate_table(
Table name
database
Database name
schema
Schema name
"""
# datafusion doesn't support `TRUNCATE TABLE` so we use `DELETE FROM`
#
# however datafusion as of 34.0.0 doesn't implement DELETE DML yet
table_loc = self._warn_and_create_table_loc(database, schema)
table_loc = self._to_sqlglot_table(database)
catalog, db = self._to_catalog_db_tuple(table_loc)

ident = sg.table(name, db=db, catalog=catalog).sql(self.dialect)
Expand Down
Loading

0 comments on commit 4957854

Please sign in to comment.