From c32604804fc478b1af0fd0821bae645a7e847023 Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Thu, 13 Jun 2024 15:04:26 -0400 Subject: [PATCH 01/23] feat(sigcheck): first cut of signature checking utilities This is a fork of some of the utilities in Scott Sanderson's `python-interface` project (https://github.com/ssanderson/python-interface). While the upstream is no longer maintained, the goal of that project aligns quite well with some of the issues we face with maintaining consistent interfaces across backends. I'm grabbing a few of these utilities and ripping out Python 2 support, then I'm going to work on setting up tests for making sure that backend methods match the signatures in the parent `BaseBackend` class. --- ibis/backends/tests/signature/__init__.py | 0 .../tests/signature/tests/test_compatible.py | 48 ++++++++ ibis/backends/tests/signature/typecheck.py | 116 ++++++++++++++++++ 3 files changed, 164 insertions(+) create mode 100644 ibis/backends/tests/signature/__init__.py create mode 100644 ibis/backends/tests/signature/tests/test_compatible.py create mode 100644 ibis/backends/tests/signature/typecheck.py diff --git a/ibis/backends/tests/signature/__init__.py b/ibis/backends/tests/signature/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/ibis/backends/tests/signature/tests/test_compatible.py b/ibis/backends/tests/signature/tests/test_compatible.py new file mode 100644 index 000000000000..7c0802bd0cc1 --- /dev/null +++ b/ibis/backends/tests/signature/tests/test_compatible.py @@ -0,0 +1,48 @@ +from __future__ import annotations # noqa: INP001 + +from inspect import signature + +import pytest +from pytest import param + +from ibis.backends.tests.signature.typecheck import compatible + + +@pytest.mark.parametrize( + "a, b, expected", + [ + param( + lambda posarg, *, kwarg1=None, kwarg2=None: ..., + lambda posarg, *, kwarg2=None, kwarg1=None: ..., + True, + id="swapped kwarg order", + ), + param( + lambda posarg, *, kwarg1=None, kwarg2=None, kwarg3=None: ..., + lambda posarg, *, kwarg2=None, kwarg1=None: ..., + True, + id="swapped kwarg order w/extra kwarg first", + ), + param( + lambda posarg, *, kwarg2=None, kwarg1=None: ..., + lambda posarg, *, kwarg1=None, kwarg2=None, kwarg3=None: ..., + True, + id="swapped kwarg order w/extra kwarg second", + ), + param( + lambda posarg, /, *, kwarg2=None, kwarg1=None: ..., + lambda posarg, *, kwarg1=None, kwarg2=None, kwarg3=None: ..., + False, + id="one positional only", + ), + param( + lambda posarg, *, kwarg1=None, kwarg2=None: ..., + lambda posarg, kwarg1=None, kwarg2=None: ..., + False, + id="not kwarg only", + ), + ], +) +def test_sigs_compatible(a, b, expected): + sig_a, sig_b = signature(a), signature(b) + assert compatible(sig_a, sig_b) == expected diff --git a/ibis/backends/tests/signature/typecheck.py b/ibis/backends/tests/signature/typecheck.py new file mode 100644 index 000000000000..58aef47b2489 --- /dev/null +++ b/ibis/backends/tests/signature/typecheck.py @@ -0,0 +1,116 @@ +"""The following was forked from the python-interface project: + +* Copyright (c) 2016-2021, Scott Sanderson + +Utilities for typed interfaces. +""" + +# ruff: noqa: D205, D415, D400 + +from __future__ import annotations + +from inspect import Parameter, Signature +from itertools import starmap, takewhile, zip_longest + + +def valfilter(f, d): + return {k: v for k, v in d.items() if f(v)} + + +def dzip(left, right): + return {k: (left.get(k), right.get(k)) for k in left.keys() & right.keys()} + + +def complement(f): + def not_f(*args, **kwargs): + return not f(*args, **kwargs) + + return not_f + + +def compatible(impl_sig: Signature, iface_sig: Signature) -> bool: + """Check whether ``impl_sig`` is compatible with ``iface_sig``. + + Parameters + ---------- + impl_sig + The signature of the implementation function. + iface_sig + The signature of the interface function. + + In general, an implementation is compatible with an interface if any valid + way of passing parameters to the interface method is also valid for the + implementation. + + Consequently, the following differences are allowed between the signature + of an implementation method and the signature of its interface definition: + + 1. An implementation may add new arguments to an interface iff: + a. All new arguments have default values. + b. All new arguments accepted positionally (i.e. all non-keyword-only + arguments) occur after any arguments declared by the interface. + c. Keyword-only arguments may be reordered by the implementation. + + 2. For type-annotated interfaces, type annotations my differ as follows: + a. Arguments to implementations of an interface may be annotated with + a **superclass** of the type specified by the interface. + b. The return type of an implementation may be annotated with a + **subclass** of the type specified by the interface. + """ + # Unwrap to get the underlying inspect.Signature objects. + return all( + [ + positionals_compatible( + takewhile(is_positional, impl_sig.parameters.values()), + takewhile(is_positional, iface_sig.parameters.values()), + ), + keywords_compatible( + valfilter(complement(is_positional), impl_sig.parameters), + valfilter(complement(is_positional), iface_sig.parameters), + ), + ] + ) + + +_POSITIONALS = frozenset([Parameter.POSITIONAL_ONLY, Parameter.POSITIONAL_OR_KEYWORD]) + + +def is_positional(arg): + return arg.kind in _POSITIONALS + + +def has_default(arg): + """Does ``arg`` provide a default?.""" + return arg.default is not Parameter.empty + + +def params_compatible(impl, iface): + if impl is None: + return False + + if iface is None: + return has_default(impl) + + return ( + impl.name == iface.name + and impl.kind == iface.kind + and has_default(impl) == has_default(iface) + and annotations_compatible(impl, iface) + ) + + +def positionals_compatible(impl_positionals, iface_positionals): + return all( + starmap(params_compatible, zip_longest(impl_positionals, iface_positionals)) + ) + + +def keywords_compatible(impl_keywords, iface_keywords): + return all(starmap(params_compatible, dzip(impl_keywords, iface_keywords).values())) + + +def annotations_compatible(impl, iface): + """Check whether the type annotations of an implementation are compatible with + the annotations of the interface it implements. + """ + return impl.annotation == iface.annotation From e49caf7c1c0d40acb2765396a4f804f29e22a942 Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Thu, 13 Jun 2024 15:51:55 -0400 Subject: [PATCH 02/23] test(signatures): add fixture that returns uninstantiated backend class --- ibis/backends/conftest.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/ibis/backends/conftest.py b/ibis/backends/conftest.py index 55450270ab15..ff9741573bed 100644 --- a/ibis/backends/conftest.py +++ b/ibis/backends/conftest.py @@ -27,6 +27,7 @@ from ibis.util import promote_tuple if TYPE_CHECKING: + from ibis.backends import BaseBackend from ibis.backends.tests.base import BackendTest @@ -391,6 +392,22 @@ def _filter_none_from_raises(kwargs): item.add_marker(pytest.mark.xfail(reason=reason, **kwargs)) +def _get_backend_cls(backend_str: str): + """Convert a backend string to the test class for the backend.""" + backend_mod = importlib.import_module(f"ibis.backends.{backend_str}") + return backend_mod.Backend + + +@pytest.fixture(params=_get_backends_to_test(), scope="session") +def backend_cls(request) -> BaseBackend: + """Return the uninstantiated backend class, unconnected. + + This is used for signature checking and nothing should be executed.""" + + cls = _get_backend_cls(request.param) + return cls + + @pytest.fixture(params=_get_backends_to_test(), scope="session") def backend(request, data_dir, tmp_path_factory, worker_id) -> BackendTest: """Return an instance of BackendTest, loaded with data.""" From 5655f80f2061ca4f15f53b0f897d1b3cb48c4de6 Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Thu, 13 Jun 2024 16:32:47 -0400 Subject: [PATCH 03/23] feat(sigcheck): make annotation checks optional in signature comparisons --- .../tests/signature/tests/test_compatible.py | 81 +++++++++++++++++-- ibis/backends/tests/signature/typecheck.py | 32 ++++++-- 2 files changed, 100 insertions(+), 13 deletions(-) diff --git a/ibis/backends/tests/signature/tests/test_compatible.py b/ibis/backends/tests/signature/tests/test_compatible.py index 7c0802bd0cc1..6801b5dbbb60 100644 --- a/ibis/backends/tests/signature/tests/test_compatible.py +++ b/ibis/backends/tests/signature/tests/test_compatible.py @@ -1,6 +1,7 @@ from __future__ import annotations # noqa: INP001 from inspect import signature +from typing import Any import pytest from pytest import param @@ -8,8 +9,26 @@ from ibis.backends.tests.signature.typecheck import compatible +def a1(posarg: int): ... + + +def b1(posarg: str): ... + + +def a2(posarg: int, **kwargs: Any): ... +def b2(posarg: str, **kwargs): ... + + +def a3(posarg: int, other_kwarg: bool = True, **kwargs: Any): ... +def b3(posarg: str, **kwargs: Any): ... + + +def a4(posarg: int, other_kwarg=True, **kwargs: Any): ... +def b4(posarg: str, **kwargs: Any): ... + + @pytest.mark.parametrize( - "a, b, expected", + "a, b, check_annotations", [ param( lambda posarg, *, kwarg1=None, kwarg2=None: ..., @@ -29,20 +48,72 @@ True, id="swapped kwarg order w/extra kwarg second", ), + param( + a1, + b1, + False, + id="annotations diff types w/out anno check", + ), + param( + a2, + b3, + False, + id="annotations different but parity in annotations", + ), + param( + a3, + b3, + False, + id="annotations different but parity in annotations for matching kwargs", + ), + param( + a4, + b4, + False, + id="annotations different but parity in annotations for matching kwargs", + ), + param( + a2, + b2, + False, + id="annotations different, no anno check, but missing annotation", + ), + ], +) +def test_sigs_compatible(a, b, check_annotations): + sig_a, sig_b = signature(a), signature(b) + assert compatible(sig_a, sig_b, check_annotations=check_annotations) + + +@pytest.mark.parametrize( + "a, b, check_annotations", + [ param( lambda posarg, /, *, kwarg2=None, kwarg1=None: ..., lambda posarg, *, kwarg1=None, kwarg2=None, kwarg3=None: ..., - False, + True, id="one positional only", ), param( lambda posarg, *, kwarg1=None, kwarg2=None: ..., lambda posarg, kwarg1=None, kwarg2=None: ..., - False, + True, id="not kwarg only", ), + param( + a1, + b1, + True, + id="annotations diff types w/anno check", + ), + param( + a2, + b3, + True, + id="annotations different but parity in annotations", + ), ], ) -def test_sigs_compatible(a, b, expected): +def test_sigs_incompatible(a, b, check_annotations): sig_a, sig_b = signature(a), signature(b) - assert compatible(sig_a, sig_b) == expected + assert not compatible(sig_a, sig_b, check_annotations=check_annotations) diff --git a/ibis/backends/tests/signature/typecheck.py b/ibis/backends/tests/signature/typecheck.py index 58aef47b2489..9ef0db514165 100644 --- a/ibis/backends/tests/signature/typecheck.py +++ b/ibis/backends/tests/signature/typecheck.py @@ -9,6 +9,7 @@ from __future__ import annotations +from functools import partial from inspect import Parameter, Signature from itertools import starmap, takewhile, zip_longest @@ -28,7 +29,9 @@ def not_f(*args, **kwargs): return not_f -def compatible(impl_sig: Signature, iface_sig: Signature) -> bool: +def compatible( + impl_sig: Signature, iface_sig: Signature, check_annotations: bool = True +) -> bool: """Check whether ``impl_sig`` is compatible with ``iface_sig``. Parameters @@ -37,6 +40,8 @@ def compatible(impl_sig: Signature, iface_sig: Signature) -> bool: The signature of the implementation function. iface_sig The signature of the interface function. + check_annotations + Whether to also compare signature annotations (default) vs only parameter names. In general, an implementation is compatible with an interface if any valid way of passing parameters to the interface method is also valid for the @@ -63,10 +68,12 @@ def compatible(impl_sig: Signature, iface_sig: Signature) -> bool: positionals_compatible( takewhile(is_positional, impl_sig.parameters.values()), takewhile(is_positional, iface_sig.parameters.values()), + check_annotations=check_annotations, ), keywords_compatible( valfilter(complement(is_positional), impl_sig.parameters), valfilter(complement(is_positional), iface_sig.parameters), + check_annotations=check_annotations, ), ] ) @@ -84,29 +91,38 @@ def has_default(arg): return arg.default is not Parameter.empty -def params_compatible(impl, iface): +def params_compatible(impl, iface, check_annotations=True): if impl is None: return False if iface is None: return has_default(impl) - return ( + checks = ( impl.name == iface.name and impl.kind == iface.kind and has_default(impl) == has_default(iface) - and annotations_compatible(impl, iface) ) + if check_annotations: + checks = checks and annotations_compatible(impl, iface) -def positionals_compatible(impl_positionals, iface_positionals): + return checks + + +def positionals_compatible(impl_positionals, iface_positionals, check_annotations=True): + params_compat = partial(params_compatible, check_annotations=check_annotations) return all( - starmap(params_compatible, zip_longest(impl_positionals, iface_positionals)) + starmap( + params_compat, + zip_longest(impl_positionals, iface_positionals), + ) ) -def keywords_compatible(impl_keywords, iface_keywords): - return all(starmap(params_compatible, dzip(impl_keywords, iface_keywords).values())) +def keywords_compatible(impl_keywords, iface_keywords, check_annotations=True): + params_compat = partial(params_compatible, check_annotations=check_annotations) + return all(starmap(params_compat, dzip(impl_keywords, iface_keywords).values())) def annotations_compatible(impl, iface): From 18995f8b625fb165554cb8a677aa590f30225f36 Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Thu, 13 Jun 2024 16:50:15 -0400 Subject: [PATCH 04/23] chore(signature): swap order of arguments to `compatible` Should be `implementation, interface_def`, not the other way around --- ibis/backends/tests/test_signatures.py | 27 ++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 ibis/backends/tests/test_signatures.py diff --git a/ibis/backends/tests/test_signatures.py b/ibis/backends/tests/test_signatures.py new file mode 100644 index 000000000000..4761d2028ecf --- /dev/null +++ b/ibis/backends/tests/test_signatures.py @@ -0,0 +1,27 @@ +from __future__ import annotations + +import inspect + +import pytest + +from ibis.backends import _FileIOHandler +from ibis.backends.tests.signature.typecheck import compatible + +params = [] + +for module in [_FileIOHandler]: + methods = list(filter(lambda x: not x.startswith("_"), dir(module))) + for method in methods: + params.append((_FileIOHandler, method)) + + +@pytest.mark.parametrize("base_cls, method", params) +def test_signatures(base_cls, method, backend_cls): + if not hasattr(backend_cls, method): + pytest.skip(f"Method {method} not present in {backend_cls}, skipping...") + + base_sig = inspect.signature(getattr(base_cls, method)) + backend_sig = inspect.signature(getattr(backend_cls, method)) + + # Usage is compatible(implementation_signature, defined_interface_signature, ...) + assert compatible(backend_sig, base_sig, check_annotations=False) From d50efd0a9ab0c0881068dfdcd62bce9f2014cb31 Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Fri, 14 Jun 2024 14:47:05 -0400 Subject: [PATCH 05/23] test(signatures): add signature checks for SQL backends --- ibis/backends/conftest.py | 13 +++++++ ibis/backends/tests/test_signatures.py | 53 ++++++++++++++++++++++++-- 2 files changed, 62 insertions(+), 4 deletions(-) diff --git a/ibis/backends/conftest.py b/ibis/backends/conftest.py index ff9741573bed..342f2ab79f08 100644 --- a/ibis/backends/conftest.py +++ b/ibis/backends/conftest.py @@ -408,6 +408,19 @@ def backend_cls(request) -> BaseBackend: return cls +@pytest.fixture( + params=_get_backends_to_test(discard=("dask", "pandas", "polars")), + scope="session", +) +def backend_sql_cls(request, data_dir, tmp_path_factory, worker_id): + """Return the uninstantiated backend class, unconnected. + + SQL backends only. + This is used for signature checking and nothing should be executed.""" + cls = _get_backend_cls(request.param) + return cls + + @pytest.fixture(params=_get_backends_to_test(), scope="session") def backend(request, data_dir, tmp_path_factory, worker_id) -> BackendTest: """Return an instance of BackendTest, loaded with data.""" diff --git a/ibis/backends/tests/test_signatures.py b/ibis/backends/tests/test_signatures.py index 4761d2028ecf..05e21a52aa7a 100644 --- a/ibis/backends/tests/test_signatures.py +++ b/ibis/backends/tests/test_signatures.py @@ -4,15 +4,30 @@ import pytest -from ibis.backends import _FileIOHandler +from ibis.backends import ( + BaseBackend, + CanCreateCatalog, + CanCreateDatabase, + CanListCatalog, + CanListDatabase, + _FileIOHandler, +) +from ibis.backends.sql import SQLBackend from ibis.backends.tests.signature.typecheck import compatible params = [] -for module in [_FileIOHandler]: +for module in [ + BaseBackend, + CanCreateCatalog, + CanCreateDatabase, + CanListCatalog, + CanListDatabase, + _FileIOHandler, +]: methods = list(filter(lambda x: not x.startswith("_"), dir(module))) for method in methods: - params.append((_FileIOHandler, method)) + params.append((module, method)) @pytest.mark.parametrize("base_cls, method", params) @@ -20,8 +35,38 @@ def test_signatures(base_cls, method, backend_cls): if not hasattr(backend_cls, method): pytest.skip(f"Method {method} not present in {backend_cls}, skipping...") - base_sig = inspect.signature(getattr(base_cls, method)) + if not callable(base_method := getattr(base_cls, method)): + pytest.skip( + f"Method {method} in {base_cls} isn't callable, can't grab signature" + ) + + base_sig = inspect.signature(base_method) backend_sig = inspect.signature(getattr(backend_cls, method)) # Usage is compatible(implementation_signature, defined_interface_signature, ...) assert compatible(backend_sig, base_sig, check_annotations=False) + + +sql_backend_params = [] + +for module in [SQLBackend]: + methods = list(filter(lambda x: not x.startswith("_"), dir(module))) + for method in methods: + sql_backend_params.append((module, method)) + + +@pytest.mark.parametrize("base_cls, method", sql_backend_params) +def test_signatures_sql_backends(base_cls, method, backend_sql_cls): + if not hasattr(backend_sql_cls, method): + pytest.skip(f"Method {method} not present in {backend_sql_cls}, skipping...") + + if not callable(base_method := getattr(base_cls, method)): + pytest.skip( + f"Method {method} in {base_cls} isn't callable, can't grab signature" + ) + + base_sig = inspect.signature(getattr(base_method)) + backend_sig = inspect.signature(getattr(backend_sql_cls, method)) + + # Usage is compatible(implementation_signature, defined_interface_signature, ...) + assert compatible(backend_sig, base_sig, check_annotations=False) From 662afa5231067546ccd428080002a261c3b7009e Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Fri, 14 Jun 2024 14:47:46 -0400 Subject: [PATCH 06/23] test(sigcheck): add test for comparing pos-only vs positional args --- ibis/backends/tests/signature/tests/test_compatible.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ibis/backends/tests/signature/tests/test_compatible.py b/ibis/backends/tests/signature/tests/test_compatible.py index 6801b5dbbb60..b96607228cbc 100644 --- a/ibis/backends/tests/signature/tests/test_compatible.py +++ b/ibis/backends/tests/signature/tests/test_compatible.py @@ -27,6 +27,10 @@ def a4(posarg: int, other_kwarg=True, **kwargs: Any): ... def b4(posarg: str, **kwargs: Any): ... +def a5(posarg: int, /): ... +def b5(posarg2: str, /): ... + + @pytest.mark.parametrize( "a, b, check_annotations", [ @@ -112,6 +116,12 @@ def test_sigs_compatible(a, b, check_annotations): True, id="annotations different but parity in annotations", ), + param( + a5, + b5, + False, + id="names different, but positional only", + ), ], ) def test_sigs_incompatible(a, b, check_annotations): From f00d8f11013b1bedc595fc800914e391df967852 Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Fri, 14 Jun 2024 14:51:20 -0400 Subject: [PATCH 07/23] test(sigcheck): don't check `_FileIOHandler` as it is part of `BaseBackend` --- ibis/backends/tests/test_signatures.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ibis/backends/tests/test_signatures.py b/ibis/backends/tests/test_signatures.py index 05e21a52aa7a..8eecfa69f8db 100644 --- a/ibis/backends/tests/test_signatures.py +++ b/ibis/backends/tests/test_signatures.py @@ -10,7 +10,6 @@ CanCreateDatabase, CanListCatalog, CanListDatabase, - _FileIOHandler, ) from ibis.backends.sql import SQLBackend from ibis.backends.tests.signature.typecheck import compatible @@ -23,7 +22,6 @@ CanCreateDatabase, CanListCatalog, CanListDatabase, - _FileIOHandler, ]: methods = list(filter(lambda x: not x.startswith("_"), dir(module))) for method in methods: @@ -65,7 +63,7 @@ def test_signatures_sql_backends(base_cls, method, backend_sql_cls): f"Method {method} in {base_cls} isn't callable, can't grab signature" ) - base_sig = inspect.signature(getattr(base_method)) + base_sig = inspect.signature(base_method) backend_sig = inspect.signature(getattr(backend_sql_cls, method)) # Usage is compatible(implementation_signature, defined_interface_signature, ...) From 2624c505971a4a1c288ed451a25a56e8d96ce896 Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Fri, 14 Jun 2024 14:56:53 -0400 Subject: [PATCH 08/23] refactor(read_csv): unify read_csv signatures across backends BREAKING CHANGE: The first argument to `read_csv` is renamed to `path` and is now a positional-only argument. All other arguments to `read_csv` are now keyword-only arguments. --- ibis/backends/__init__.py | 5 +++-- ibis/backends/bigquery/__init__.py | 2 +- ibis/backends/clickhouse/__init__.py | 2 ++ ibis/backends/dask/__init__.py | 11 ++++++++--- ibis/backends/datafusion/__init__.py | 2 +- ibis/backends/duckdb/__init__.py | 12 +++++++----- ibis/backends/flink/__init__.py | 8 +++++--- ibis/backends/flink/tests/test_ddl.py | 2 +- ibis/backends/pandas/__init__.py | 11 ++++++++--- ibis/backends/polars/__init__.py | 2 ++ ibis/backends/pyspark/__init__.py | 8 +++++--- ibis/backends/pyspark/tests/test_import_export.py | 2 +- ibis/backends/snowflake/__init__.py | 2 +- 13 files changed, 45 insertions(+), 24 deletions(-) diff --git a/ibis/backends/__init__.py b/ibis/backends/__init__.py index 265d3ab76561..631e3f3e45a1 100644 --- a/ibis/backends/__init__.py +++ b/ibis/backends/__init__.py @@ -362,14 +362,15 @@ def read_parquet( ) def read_csv( - self, path: str | Path, table_name: str | None = None, **kwargs: Any + self, path: str | Path, /, *, table_name: str | None = None, **kwargs: Any ) -> ir.Table: """Register a CSV file as a table in the current backend. Parameters ---------- path - The data source. A string or Path to the CSV file. + The data source(s). A string or Path to the CSV file or directory + containing CSV files. table_name An optional name to use for the created table. This defaults to a sequentially generated name. diff --git a/ibis/backends/bigquery/__init__.py b/ibis/backends/bigquery/__init__.py index a1bef8f57f2f..fedafdd4176a 100644 --- a/ibis/backends/bigquery/__init__.py +++ b/ibis/backends/bigquery/__init__.py @@ -274,7 +274,7 @@ def read_parquet( ) def read_csv( - self, path: str | Path, table_name: str | None = None, **kwargs: Any + self, path: str | Path, /, *, table_name: str | None = None, **kwargs: Any ) -> ir.Table: """Read CSV data into a BigQuery table. diff --git a/ibis/backends/clickhouse/__init__.py b/ibis/backends/clickhouse/__init__.py index d93a522c19c9..40e2e09c21bf 100644 --- a/ibis/backends/clickhouse/__init__.py +++ b/ibis/backends/clickhouse/__init__.py @@ -572,6 +572,8 @@ def read_parquet( def read_csv( self, path: str | Path, + /, + *, table_name: str | None = None, engine: str = "MergeTree", **kwargs: Any, diff --git a/ibis/backends/dask/__init__.py b/ibis/backends/dask/__init__.py index b8e217affccb..9840aba98fe7 100644 --- a/ibis/backends/dask/__init__.py +++ b/ibis/backends/dask/__init__.py @@ -108,13 +108,18 @@ def execute( return DaskExecutor.execute(expr.op(), backend=self, params=params) def read_csv( - self, source: str | pathlib.Path, table_name: str | None = None, **kwargs: Any + self, + path: str | pathlib.Path, + /, + *, + table_name: str | None = None, + **kwargs: Any, ): """Register a CSV file as a table in the current session. Parameters ---------- - source + path The data source. Can be a local or remote file, pathlike objects also accepted. table_name @@ -132,7 +137,7 @@ def read_csv( """ table_name = table_name or util.gen_name("read_csv") - df = dd.read_csv(source, **kwargs) + df = dd.read_csv(path, **kwargs) self.dictionary[table_name] = df return self.table(table_name) diff --git a/ibis/backends/datafusion/__init__.py b/ibis/backends/datafusion/__init__.py index e53e8112c1da..91a8d720d23c 100644 --- a/ibis/backends/datafusion/__init__.py +++ b/ibis/backends/datafusion/__init__.py @@ -426,7 +426,7 @@ def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: self.con.register_dataset(name=name, dataset=empty_dataset) def read_csv( - self, path: str | Path, table_name: str | None = None, **kwargs: Any + self, path: str | Path, /, *, table_name: str | None = None, **kwargs: Any ) -> ir.Table: """Register a CSV file as a table in the current database. diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index f1e2f647b156..26ecadad7918 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -636,7 +636,9 @@ def read_json( def read_csv( self, - source_list: str | list[str] | tuple[str], + path: str | list[str] | tuple[str], + /, + *, table_name: str | None = None, columns: Mapping[str, str | dt.DataType] | None = None, types: Mapping[str, str | dt.DataType] | None = None, @@ -646,9 +648,9 @@ def read_csv( Parameters ---------- - source_list - The data source(s). May be a path to a file or directory of CSV - files, or an iterable of CSV files. + path + The data source(s). May be a path to a file or directory of CSV files, or an + iterable of CSV files. table_name An optional name to use for the created table. This defaults to a sequentially generated name. @@ -713,7 +715,7 @@ def read_csv( │ 2.0 │ 3.0 │ │ └─────────┴─────────┴──────────────────────┘ """ - source_list = util.normalize_filenames(source_list) + source_list = util.normalize_filenames(path) if not table_name: table_name = util.gen_name("read_csv") diff --git a/ibis/backends/flink/__init__.py b/ibis/backends/flink/__init__.py index f959ca1e5f0c..67ecff617f14 100644 --- a/ibis/backends/flink/__init__.py +++ b/ibis/backends/flink/__init__.py @@ -790,8 +790,10 @@ def read_parquet( def read_csv( self, path: str | Path, - schema: sch.Schema | None = None, + /, + *, table_name: str | None = None, + schema: sch.Schema | None = None, ) -> ir.Table: """Register a csv file as a table in the current database. @@ -799,11 +801,11 @@ def read_csv( ---------- path The data source. - schema - The schema for the new table. table_name An optional name to use for the created table. This defaults to a sequentially generated name. + schema + The schema for the new table. Returns ------- diff --git a/ibis/backends/flink/tests/test_ddl.py b/ibis/backends/flink/tests/test_ddl.py index c80ec2439f5e..39591fd448d6 100644 --- a/ibis/backends/flink/tests/test_ddl.py +++ b/ibis/backends/flink/tests/test_ddl.py @@ -507,7 +507,7 @@ def test_insert_simple_select(con, tempdir_sink_configs): def test_read_csv(con, awards_players_schema, csv_source_configs, table_name): source_configs = csv_source_configs("awards_players") table = con.read_csv( - path=source_configs["path"], + source_configs["path"], schema=awards_players_schema, table_name=table_name, ) diff --git a/ibis/backends/pandas/__init__.py b/ibis/backends/pandas/__init__.py index 402d058d2537..ef14f1f069bb 100644 --- a/ibis/backends/pandas/__init__.py +++ b/ibis/backends/pandas/__init__.py @@ -87,13 +87,18 @@ def from_dataframe( return client.table(name) def read_csv( - self, source: str | pathlib.Path, table_name: str | None = None, **kwargs: Any + self, + path: str | pathlib.Path, + /, + *, + table_name: str | None = None, + **kwargs: Any, ): """Register a CSV file as a table in the current session. Parameters ---------- - source + path The data source. Can be a local or remote file, pathlike objects also accepted. table_name @@ -111,7 +116,7 @@ def read_csv( """ table_name = table_name or util.gen_name("read_csv") - df = pd.read_csv(source, **kwargs) + df = pd.read_csv(path, **kwargs) self.dictionary[table_name] = df return self.table(table_name) diff --git a/ibis/backends/polars/__init__.py b/ibis/backends/polars/__init__.py index 4ec1f50805e0..4293b0ae7d8d 100644 --- a/ibis/backends/polars/__init__.py +++ b/ibis/backends/polars/__init__.py @@ -165,6 +165,8 @@ def sql( def read_csv( self, path: str | Path | list[str | Path] | tuple[str | Path], + /, + *, table_name: str | None = None, **kwargs: Any, ) -> ir.Table: diff --git a/ibis/backends/pyspark/__init__.py b/ibis/backends/pyspark/__init__.py index 0ea49d20b2bc..47d25ba4e7bc 100644 --- a/ibis/backends/pyspark/__init__.py +++ b/ibis/backends/pyspark/__init__.py @@ -782,7 +782,9 @@ def read_parquet( def read_csv( self, - source_list: str | list[str] | tuple[str], + path: str | list[str] | tuple[str], + /, + *, table_name: str | None = None, **kwargs: Any, ) -> ir.Table: @@ -790,7 +792,7 @@ def read_csv( Parameters ---------- - source_list + path The data source(s). May be a path to a file or directory of CSV files, or an iterable of CSV files. table_name @@ -813,7 +815,7 @@ def read_csv( ) inferSchema = kwargs.pop("inferSchema", True) header = kwargs.pop("header", True) - source_list = normalize_filenames(source_list) + source_list = normalize_filenames(path) spark_df = self._session.read.csv( source_list, inferSchema=inferSchema, header=header, **kwargs ) diff --git a/ibis/backends/pyspark/tests/test_import_export.py b/ibis/backends/pyspark/tests/test_import_export.py index bc57ddf8728c..e320f37aab20 100644 --- a/ibis/backends/pyspark/tests/test_import_export.py +++ b/ibis/backends/pyspark/tests/test_import_export.py @@ -13,7 +13,7 @@ "method", [ methodcaller("read_delta", path="test.delta"), - methodcaller("read_csv", source_list="test.csv"), + methodcaller("read_csv", "test.csv"), methodcaller("read_parquet", path="test.parquet"), methodcaller("read_json", source_list="test.json"), ], diff --git a/ibis/backends/snowflake/__init__.py b/ibis/backends/snowflake/__init__.py index 4e3926b9b8a5..e859128c17d4 100644 --- a/ibis/backends/snowflake/__init__.py +++ b/ibis/backends/snowflake/__init__.py @@ -866,7 +866,7 @@ def create_table( return self.table(name, database=(catalog, db)) def read_csv( - self, path: str | Path, table_name: str | None = None, **kwargs: Any + self, path: str | Path, /, *, table_name: str | None = None, **kwargs: Any ) -> ir.Table: """Register a CSV file as a table in the Snowflake backend. From c1153fbe8684c2d1d7c14216eb9cee82a266cea0 Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Fri, 14 Jun 2024 15:01:19 -0400 Subject: [PATCH 09/23] refactor(read_parquet): unify read_parquet signatures across backends BREAKING CHANGE: The first argument to `read_parquet` is renamed to `path` and is now a positional-only argument. All other arguments to `read_parquet` are now keyword-only arguments. --- ibis/backends/__init__.py | 2 +- ibis/backends/bigquery/__init__.py | 2 +- ibis/backends/clickhouse/__init__.py | 2 ++ ibis/backends/dask/__init__.py | 11 ++++++++--- ibis/backends/datafusion/__init__.py | 2 +- ibis/backends/duckdb/__init__.py | 8 +++++--- ibis/backends/flink/__init__.py | 2 ++ ibis/backends/flink/tests/test_ddl.py | 2 +- ibis/backends/pandas/__init__.py | 11 ++++++++--- ibis/backends/polars/__init__.py | 2 ++ ibis/backends/pyspark/__init__.py | 2 ++ ibis/backends/pyspark/tests/test_import_export.py | 2 +- ibis/backends/snowflake/__init__.py | 2 +- 13 files changed, 35 insertions(+), 15 deletions(-) diff --git a/ibis/backends/__init__.py b/ibis/backends/__init__.py index 631e3f3e45a1..8c15c997c25f 100644 --- a/ibis/backends/__init__.py +++ b/ibis/backends/__init__.py @@ -337,7 +337,7 @@ def to_torch( } def read_parquet( - self, path: str | Path, table_name: str | None = None, **kwargs: Any + self, path: str | Path, /, *, table_name: str | None = None, **kwargs: Any ) -> ir.Table: """Register a parquet file as a table in the current backend. diff --git a/ibis/backends/bigquery/__init__.py b/ibis/backends/bigquery/__init__.py index fedafdd4176a..84840e23ccf6 100644 --- a/ibis/backends/bigquery/__init__.py +++ b/ibis/backends/bigquery/__init__.py @@ -246,7 +246,7 @@ def load(file: str) -> None: return self.table(table_name, database=(catalog, database)) def read_parquet( - self, path: str | Path, table_name: str | None = None, **kwargs: Any + self, path: str | Path, /, *, table_name: str | None = None, **kwargs: Any ): """Read Parquet data into a BigQuery table. diff --git a/ibis/backends/clickhouse/__init__.py b/ibis/backends/clickhouse/__init__.py index 40e2e09c21bf..70cb8734fcb1 100644 --- a/ibis/backends/clickhouse/__init__.py +++ b/ibis/backends/clickhouse/__init__.py @@ -544,6 +544,8 @@ def truncate_table(self, name: str, database: str | None = None) -> None: def read_parquet( self, path: str | Path, + /, + *, table_name: str | None = None, engine: str = "MergeTree", **kwargs: Any, diff --git a/ibis/backends/dask/__init__.py b/ibis/backends/dask/__init__.py index 9840aba98fe7..2a5efe38ea06 100644 --- a/ibis/backends/dask/__init__.py +++ b/ibis/backends/dask/__init__.py @@ -142,13 +142,18 @@ def read_csv( return self.table(table_name) def read_parquet( - self, source: str | pathlib.Path, table_name: str | None = None, **kwargs: Any + self, + path: str | pathlib.Path, + /, + *, + table_name: str | None = None, + **kwargs: Any, ): """Register a parquet file as a table in the current session. Parameters ---------- - source + path The data source(s). May be a path to a file, an iterable of files, or directory of parquet files. table_name @@ -166,7 +171,7 @@ def read_parquet( """ table_name = table_name or util.gen_name("read_parquet") - df = dd.read_parquet(source, **kwargs) + df = dd.read_parquet(path, **kwargs) self.dictionary[table_name] = df return self.table(table_name) diff --git a/ibis/backends/datafusion/__init__.py b/ibis/backends/datafusion/__init__.py index 91a8d720d23c..69072f16c41f 100644 --- a/ibis/backends/datafusion/__init__.py +++ b/ibis/backends/datafusion/__init__.py @@ -454,7 +454,7 @@ def read_csv( return self.table(table_name) def read_parquet( - self, path: str | Path, table_name: str | None = None, **kwargs: Any + self, path: str | Path, /, *, table_name: str | None = None, **kwargs: Any ) -> ir.Table: """Register a parquet file as a table in the current database. diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index 26ecadad7918..2d6a2ccf16b1 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -824,7 +824,9 @@ def read_geo( def read_parquet( self, - source_list: str | Iterable[str], + path: str | Iterable[str], + /, + *, table_name: str | None = None, **kwargs: Any, ) -> ir.Table: @@ -832,7 +834,7 @@ def read_parquet( Parameters ---------- - source_list + path The data source(s). May be a path to a file, an iterable of files, or directory of parquet files. table_name @@ -848,7 +850,7 @@ def read_parquet( The just-registered table """ - source_list = util.normalize_filenames(source_list) + source_list = util.normalize_filenames(path) table_name = table_name or util.gen_name("read_parquet") diff --git a/ibis/backends/flink/__init__.py b/ibis/backends/flink/__init__.py index 67ecff617f14..cf33fce14aff 100644 --- a/ibis/backends/flink/__init__.py +++ b/ibis/backends/flink/__init__.py @@ -762,6 +762,8 @@ def _read_file( def read_parquet( self, path: str | Path, + /, + *, schema: sch.Schema | None = None, table_name: str | None = None, ) -> ir.Table: diff --git a/ibis/backends/flink/tests/test_ddl.py b/ibis/backends/flink/tests/test_ddl.py index 39591fd448d6..d48d318078f7 100644 --- a/ibis/backends/flink/tests/test_ddl.py +++ b/ibis/backends/flink/tests/test_ddl.py @@ -526,7 +526,7 @@ def test_read_parquet(con, data_dir, tmp_path, table_name, functional_alltypes_s fname = Path("functional_alltypes.parquet") fname = Path(data_dir) / "parquet" / fname.name table = con.read_parquet( - path=tmp_path / fname.name, + tmp_path / fname.name, schema=functional_alltypes_schema, table_name=table_name, ) diff --git a/ibis/backends/pandas/__init__.py b/ibis/backends/pandas/__init__.py index ef14f1f069bb..23509f033d1c 100644 --- a/ibis/backends/pandas/__init__.py +++ b/ibis/backends/pandas/__init__.py @@ -121,13 +121,18 @@ def read_csv( return self.table(table_name) def read_parquet( - self, source: str | pathlib.Path, table_name: str | None = None, **kwargs: Any + self, + path: str | pathlib.Path, + /, + *, + table_name: str | None = None, + **kwargs: Any, ): """Register a parquet file as a table in the current session. Parameters ---------- - source + path The data source(s). May be a path to a file, an iterable of files, or directory of parquet files. table_name @@ -145,7 +150,7 @@ def read_parquet( """ table_name = table_name or util.gen_name("read_parquet") - df = pd.read_parquet(source, **kwargs) + df = pd.read_parquet(path, **kwargs) self.dictionary[table_name] = df return self.table(table_name) diff --git a/ibis/backends/polars/__init__.py b/ibis/backends/polars/__init__.py index 4293b0ae7d8d..6300d50f3425 100644 --- a/ibis/backends/polars/__init__.py +++ b/ibis/backends/polars/__init__.py @@ -307,6 +307,8 @@ def read_pandas( def read_parquet( self, path: str | Path | Iterable[str], + /, + *, table_name: str | None = None, **kwargs: Any, ) -> ir.Table: diff --git a/ibis/backends/pyspark/__init__.py b/ibis/backends/pyspark/__init__.py index 47d25ba4e7bc..90e930665c41 100644 --- a/ibis/backends/pyspark/__init__.py +++ b/ibis/backends/pyspark/__init__.py @@ -746,6 +746,8 @@ def read_delta( def read_parquet( self, path: str | Path, + /, + *, table_name: str | None = None, **kwargs: Any, ) -> ir.Table: diff --git a/ibis/backends/pyspark/tests/test_import_export.py b/ibis/backends/pyspark/tests/test_import_export.py index e320f37aab20..dc6a3c5b6b74 100644 --- a/ibis/backends/pyspark/tests/test_import_export.py +++ b/ibis/backends/pyspark/tests/test_import_export.py @@ -14,7 +14,7 @@ [ methodcaller("read_delta", path="test.delta"), methodcaller("read_csv", "test.csv"), - methodcaller("read_parquet", path="test.parquet"), + methodcaller("read_parquet", "test.parquet"), methodcaller("read_json", source_list="test.json"), ], ) diff --git a/ibis/backends/snowflake/__init__.py b/ibis/backends/snowflake/__init__.py index e859128c17d4..092e07e87458 100644 --- a/ibis/backends/snowflake/__init__.py +++ b/ibis/backends/snowflake/__init__.py @@ -1084,7 +1084,7 @@ def read_json( return self.table(table) def read_parquet( - self, path: str | Path, table_name: str | None = None, **kwargs: Any + self, path: str | Path, /, *, table_name: str | None = None, **kwargs: Any ) -> ir.Table: """Read a Parquet file into an ibis table, using Snowflake. From 2a7edc65c3f118075b21c5787961bec0ef58acca Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Fri, 14 Jun 2024 15:04:13 -0400 Subject: [PATCH 10/23] refactor(read_json): unify read_json signatures across backends BREAKING CHANGE: The first argument to `read_json` is renamed to `path` and is now a positional-only argument. All other arguments to `read_json` are now keyword-only arguments. --- ibis/backends/__init__.py | 2 +- ibis/backends/bigquery/__init__.py | 2 +- ibis/backends/duckdb/__init__.py | 10 +++++----- ibis/backends/flink/__init__.py | 2 ++ ibis/backends/flink/tests/test_ddl.py | 2 +- ibis/backends/polars/__init__.py | 2 +- ibis/backends/pyspark/__init__.py | 8 +++++--- ibis/backends/pyspark/tests/test_import_export.py | 2 +- ibis/backends/snowflake/__init__.py | 2 +- 9 files changed, 18 insertions(+), 14 deletions(-) diff --git a/ibis/backends/__init__.py b/ibis/backends/__init__.py index 8c15c997c25f..24700ed6f52b 100644 --- a/ibis/backends/__init__.py +++ b/ibis/backends/__init__.py @@ -388,7 +388,7 @@ def read_csv( ) def read_json( - self, path: str | Path, table_name: str | None = None, **kwargs: Any + self, path: str | Path, /, *, table_name: str | None = None, **kwargs: Any ) -> ir.Table: """Register a JSON file as a table in the current backend. diff --git a/ibis/backends/bigquery/__init__.py b/ibis/backends/bigquery/__init__.py index 84840e23ccf6..eaadb144aa83 100644 --- a/ibis/backends/bigquery/__init__.py +++ b/ibis/backends/bigquery/__init__.py @@ -303,7 +303,7 @@ def read_csv( return self._read_file(path, table_name=table_name, job_config=job_config) def read_json( - self, path: str | Path, table_name: str | None = None, **kwargs: Any + self, path: str | Path, /, *, table_name: str | None = None, **kwargs: Any ) -> ir.Table: """Read newline-delimited JSON data into a BigQuery table. diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index 2d6a2ccf16b1..bc561f3a151a 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -591,7 +591,9 @@ def _register_failure(self): @util.experimental def read_json( self, - source_list: str | list[str] | tuple[str], + path: str | list[str] | tuple[str], + /, + *, table_name: str | None = None, **kwargs, ) -> ir.Table: @@ -603,7 +605,7 @@ def read_json( Parameters ---------- - source_list + path File or list of files table_name Optional table name @@ -626,9 +628,7 @@ def read_json( self._create_temp_view( table_name, sg.select(STAR).from_( - self.compiler.f.read_json_auto( - util.normalize_filenames(source_list), *options - ) + self.compiler.f.read_json_auto(util.normalize_filenames(path), *options) ), ) diff --git a/ibis/backends/flink/__init__.py b/ibis/backends/flink/__init__.py index cf33fce14aff..b0b8ec361794 100644 --- a/ibis/backends/flink/__init__.py +++ b/ibis/backends/flink/__init__.py @@ -822,6 +822,8 @@ def read_csv( def read_json( self, path: str | Path, + /, + *, schema: sch.Schema | None = None, table_name: str | None = None, ) -> ir.Table: diff --git a/ibis/backends/flink/tests/test_ddl.py b/ibis/backends/flink/tests/test_ddl.py index d48d318078f7..9f8f15e68195 100644 --- a/ibis/backends/flink/tests/test_ddl.py +++ b/ibis/backends/flink/tests/test_ddl.py @@ -553,7 +553,7 @@ def test_read_json(con, data_dir, tmp_path, table_name, functional_alltypes_sche path = tmp_path / "functional_alltypes.json" df.to_json(path, orient="records", lines=True, date_format="iso") table = con.read_json( - path=path, schema=functional_alltypes_schema, table_name=table_name + path, schema=functional_alltypes_schema, table_name=table_name ) try: diff --git a/ibis/backends/polars/__init__.py b/ibis/backends/polars/__init__.py index 6300d50f3425..320d598be33b 100644 --- a/ibis/backends/polars/__init__.py +++ b/ibis/backends/polars/__init__.py @@ -209,7 +209,7 @@ def read_csv( return self.table(table_name) def read_json( - self, path: str | Path, table_name: str | None = None, **kwargs: Any + self, path: str | Path, /, *, table_name: str | None = None, **kwargs: Any ) -> ir.Table: """Register a JSON file as a table. diff --git a/ibis/backends/pyspark/__init__.py b/ibis/backends/pyspark/__init__.py index 90e930665c41..e84e89e9a941 100644 --- a/ibis/backends/pyspark/__init__.py +++ b/ibis/backends/pyspark/__init__.py @@ -828,7 +828,9 @@ def read_csv( def read_json( self, - source_list: str | Sequence[str], + path: str | Sequence[str], + /, + *, table_name: str | None = None, **kwargs: Any, ) -> ir.Table: @@ -836,7 +838,7 @@ def read_json( Parameters ---------- - source_list + path The data source(s). May be a path to a file or directory of JSON files, or an iterable of JSON files. table_name @@ -857,7 +859,7 @@ def read_json( "Pyspark in streaming mode does not support direction registration of JSON files. " "Please use `read_json_dir` instead." ) - source_list = normalize_filenames(source_list) + source_list = normalize_filenames(path) spark_df = self._session.read.json(source_list, **kwargs) table_name = table_name or util.gen_name("read_json") diff --git a/ibis/backends/pyspark/tests/test_import_export.py b/ibis/backends/pyspark/tests/test_import_export.py index dc6a3c5b6b74..b92a8e2ba7b8 100644 --- a/ibis/backends/pyspark/tests/test_import_export.py +++ b/ibis/backends/pyspark/tests/test_import_export.py @@ -15,7 +15,7 @@ methodcaller("read_delta", path="test.delta"), methodcaller("read_csv", "test.csv"), methodcaller("read_parquet", "test.parquet"), - methodcaller("read_json", source_list="test.json"), + methodcaller("read_json", "test.json"), ], ) def test_streaming_import_not_implemented(con_streaming, method): diff --git a/ibis/backends/snowflake/__init__.py b/ibis/backends/snowflake/__init__.py index 092e07e87458..50983642b7ac 100644 --- a/ibis/backends/snowflake/__init__.py +++ b/ibis/backends/snowflake/__init__.py @@ -995,7 +995,7 @@ def read_csv( return self.table(table) def read_json( - self, path: str | Path, table_name: str | None = None, **kwargs: Any + self, path: str | Path, /, *, table_name: str | None = None, **kwargs: Any ) -> ir.Table: """Read newline-delimited JSON into an ibis table, using Snowflake. From 00a68227c82f60b6481adc47978a646382bedfed Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Fri, 14 Jun 2024 15:15:02 -0400 Subject: [PATCH 11/23] refactor(read_delta): unify read_delta signatures across backends BREAKING CHANGE: The first argument to `read_delta` is renamed to `path` and is now a positional-only argument. All other arguments to `read_delta` are now keyword-only arguments. --- ibis/backends/__init__.py | 4 ++-- ibis/backends/datafusion/__init__.py | 6 +++--- ibis/backends/duckdb/__init__.py | 8 +++++--- ibis/backends/polars/__init__.py | 2 +- ibis/backends/pyspark/__init__.py | 2 ++ ibis/backends/pyspark/tests/test_import_export.py | 2 +- 6 files changed, 14 insertions(+), 10 deletions(-) diff --git a/ibis/backends/__init__.py b/ibis/backends/__init__.py index 24700ed6f52b..c5a24504f011 100644 --- a/ibis/backends/__init__.py +++ b/ibis/backends/__init__.py @@ -413,13 +413,13 @@ def read_json( ) def read_delta( - self, source: str | Path, table_name: str | None = None, **kwargs: Any + self, path: str | Path, /, *, table_name: str | None = None, **kwargs: Any ): """Register a Delta Lake table in the current database. Parameters ---------- - source + path The data source. Must be a directory containing a Delta Lake table. table_name diff --git a/ibis/backends/datafusion/__init__.py b/ibis/backends/datafusion/__init__.py index 69072f16c41f..bc90da64557e 100644 --- a/ibis/backends/datafusion/__init__.py +++ b/ibis/backends/datafusion/__init__.py @@ -482,13 +482,13 @@ def read_parquet( return self.table(table_name) def read_delta( - self, source_table: str | Path, table_name: str | None = None, **kwargs: Any + self, path: str | Path, /, *, table_name: str | None = None, **kwargs: Any ) -> ir.Table: """Register a Delta Lake table as a table in the current database. Parameters ---------- - source_table + path The data source. Must be a directory containing a Delta Lake table. table_name @@ -503,7 +503,7 @@ def read_delta( The just-registered table """ - source_table = normalize_filename(source_table) + source_table = normalize_filename(path) table_name = table_name or gen_name("read_delta") diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index bc561f3a151a..4e6f5b6c3bc1 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -936,7 +936,9 @@ def read_in_memory( def read_delta( self, - source_table: str, + path: str, + /, + *, table_name: str | None = None, **kwargs: Any, ) -> ir.Table: @@ -944,7 +946,7 @@ def read_delta( Parameters ---------- - source_table + path The data source. Must be a directory containing a Delta Lake table. table_name @@ -959,7 +961,7 @@ def read_delta( The just-registered table. """ - source_table = util.normalize_filenames(source_table)[0] + source_table = util.normalize_filenames(path)[0] table_name = table_name or util.gen_name("read_delta") diff --git a/ibis/backends/polars/__init__.py b/ibis/backends/polars/__init__.py index 320d598be33b..0ffe8620dc54 100644 --- a/ibis/backends/polars/__init__.py +++ b/ibis/backends/polars/__init__.py @@ -241,7 +241,7 @@ def read_json( return self.table(table_name) def read_delta( - self, path: str | Path, table_name: str | None = None, **kwargs: Any + self, path: str | Path, /, *, table_name: str | None = None, **kwargs: Any ) -> ir.Table: """Register a Delta Lake as a table in the current database. diff --git a/ibis/backends/pyspark/__init__.py b/ibis/backends/pyspark/__init__.py index e84e89e9a941..685feb40a79c 100644 --- a/ibis/backends/pyspark/__init__.py +++ b/ibis/backends/pyspark/__init__.py @@ -710,6 +710,8 @@ def _clean_up_cached_table(self, name): def read_delta( self, path: str | Path, + /, + *, table_name: str | None = None, **kwargs: Any, ) -> ir.Table: diff --git a/ibis/backends/pyspark/tests/test_import_export.py b/ibis/backends/pyspark/tests/test_import_export.py index b92a8e2ba7b8..94c47638afc2 100644 --- a/ibis/backends/pyspark/tests/test_import_export.py +++ b/ibis/backends/pyspark/tests/test_import_export.py @@ -12,7 +12,7 @@ @pytest.mark.parametrize( "method", [ - methodcaller("read_delta", path="test.delta"), + methodcaller("read_delta", "test.delta"), methodcaller("read_csv", "test.csv"), methodcaller("read_parquet", "test.parquet"), methodcaller("read_json", "test.json"), From b1f3d5c6c0172fe7476cc9c5ea841d0de615d4b1 Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Fri, 14 Jun 2024 15:29:00 -0400 Subject: [PATCH 12/23] refactor(*_catalog): unify *_catalog(s) signatures across backends BREAKING CHANGE: The first argument to `create_catalog` is now a positional-only argument. All other arguments to `create_catalog` are now keyword-only arguments. BREAKING CHANGE: The first argument to `drop_catalog` is now a positional-only argument. All other arguments to `drop_catalog` are now keyword-only arguments. --- ibis/backends/__init__.py | 6 +++--- ibis/backends/datafusion/__init__.py | 4 ++-- ibis/backends/mssql/__init__.py | 4 ++-- ibis/backends/snowflake/__init__.py | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/ibis/backends/__init__.py b/ibis/backends/__init__.py index c5a24504f011..df285c2a43a7 100644 --- a/ibis/backends/__init__.py +++ b/ibis/backends/__init__.py @@ -588,7 +588,7 @@ def to_delta( class CanListCatalog(abc.ABC): @abc.abstractmethod - def list_catalogs(self, like: str | None = None) -> list[str]: + def list_catalogs(self, *, like: str | None = None) -> list[str]: """List existing catalogs in the current connection. ::: {.callout-note} @@ -624,7 +624,7 @@ def current_catalog(self) -> str: class CanCreateCatalog(CanListCatalog): @abc.abstractmethod - def create_catalog(self, name: str, force: bool = False) -> None: + def create_catalog(self, name: str, /, *, force: bool = False) -> None: """Create a new catalog. ::: {.callout-note} @@ -648,7 +648,7 @@ def create_catalog(self, name: str, force: bool = False) -> None: """ @abc.abstractmethod - def drop_catalog(self, name: str, force: bool = False) -> None: + def drop_catalog(self, name: str, /, *, force: bool = False) -> None: """Drop a catalog with name `name`. ::: {.callout-note} diff --git a/ibis/backends/datafusion/__init__.py b/ibis/backends/datafusion/__init__.py index bc90da64557e..69c631c31ab4 100644 --- a/ibis/backends/datafusion/__init__.py +++ b/ibis/backends/datafusion/__init__.py @@ -242,13 +242,13 @@ def list_catalogs(self, like: str | None = None) -> list[str]: result = self.con.sql(code).to_pydict() return self._filter_with_like(result["table_catalog"], like) - def create_catalog(self, name: str, force: bool = False) -> None: + def create_catalog(self, name: str, /, *, force: bool = False) -> None: with self._safe_raw_sql( sge.Create(kind="DATABASE", this=sg.to_identifier(name), exists=force) ): pass - def drop_catalog(self, name: str, force: bool = False) -> None: + def drop_catalog(self, name: str, /, *, force: bool = False) -> None: raise com.UnsupportedOperationError( "DataFusion does not support dropping databases" ) diff --git a/ibis/backends/mssql/__init__.py b/ibis/backends/mssql/__init__.py index 6fdfcd109132..938394dfef36 100644 --- a/ibis/backends/mssql/__init__.py +++ b/ibis/backends/mssql/__init__.py @@ -342,7 +342,7 @@ def raw_sql(self, query: str | sg.Expression, **kwargs: Any) -> Any: cursor.execute(query, **kwargs) return cursor - def create_catalog(self, name: str, force: bool = False) -> None: + def create_catalog(self, name: str, /, *, force: bool = False) -> None: expr = ( sg.select(STAR) .from_(sg.table("databases", db="sys")) @@ -364,7 +364,7 @@ def create_catalog(self, name: str, force: bool = False) -> None: with self._safe_ddl(create_stmt): pass - def drop_catalog(self, name: str, force: bool = False) -> None: + def drop_catalog(self, name: str, /, *, force: bool = False) -> None: with self._safe_ddl( sge.Drop( kind="DATABASE", diff --git a/ibis/backends/snowflake/__init__.py b/ibis/backends/snowflake/__init__.py index 50983642b7ac..d525d8f39dde 100644 --- a/ibis/backends/snowflake/__init__.py +++ b/ibis/backends/snowflake/__init__.py @@ -665,7 +665,7 @@ def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: with contextlib.suppress(Exception): shutil.rmtree(tmpdir.name) - def create_catalog(self, name: str, force: bool = False) -> None: + def create_catalog(self, name: str, /, *, force: bool = False) -> None: current_catalog = self.current_catalog current_database = self.current_database quoted = self.compiler.quoted @@ -683,7 +683,7 @@ def create_catalog(self, name: str, force: bool = False) -> None: # so we switch back to the original database and schema cur.execute(use_stmt) - def drop_catalog(self, name: str, force: bool = False) -> None: + def drop_catalog(self, name: str, /, *, force: bool = False) -> None: current_catalog = self.current_catalog if name == current_catalog: raise com.UnsupportedOperationError( From 22492be22f4fae11a15de4e563d703d43f7c3c5b Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Fri, 14 Jun 2024 15:30:44 -0400 Subject: [PATCH 13/23] refactor(*_database): unify *_database(s) signatures across backends BREAKING CHANGE: The first argument to `create_database` is now a positional-only argument. All other arguments to `create_database` are now keyword-only arguments. BREAKING CHANGE: The first argument to `drop_database` is now a positional-only argument. All other arguments to `drop_database` are now keyword-only arguments. BREAKING CHANGE: All arguments to `list_databases` are now keyword-only arguments. --- ibis/backends/__init__.py | 10 +++++----- ibis/backends/bigquery/__init__.py | 4 ++++ ibis/backends/clickhouse/__init__.py | 4 ++-- ibis/backends/datafusion/__init__.py | 4 ++-- ibis/backends/duckdb/__init__.py | 4 ++-- ibis/backends/exasol/__init__.py | 4 ++-- ibis/backends/flink/__init__.py | 4 +++- ibis/backends/impala/__init__.py | 4 ++-- ibis/backends/mssql/__init__.py | 4 ++-- ibis/backends/mysql/__init__.py | 4 ++-- ibis/backends/postgres/__init__.py | 4 +++- ibis/backends/pyspark/__init__.py | 3 ++- ibis/backends/snowflake/__init__.py | 4 ++-- ibis/backends/trino/__init__.py | 4 ++-- 14 files changed, 35 insertions(+), 26 deletions(-) diff --git a/ibis/backends/__init__.py b/ibis/backends/__init__.py index df285c2a43a7..9198d0e70172 100644 --- a/ibis/backends/__init__.py +++ b/ibis/backends/__init__.py @@ -675,7 +675,7 @@ def drop_catalog(self, name: str, /, *, force: bool = False) -> None: class CanListDatabase(abc.ABC): @abc.abstractmethod def list_databases( - self, like: str | None = None, catalog: str | None = None + self, *, like: str | None = None, catalog: str | None = None ) -> list[str]: """List existing databases in the current connection. @@ -716,7 +716,7 @@ def current_database(self) -> str: class CanCreateDatabase(CanListDatabase): @abc.abstractmethod def create_database( - self, name: str, catalog: str | None = None, force: bool = False + self, name: str, /, *, catalog: str | None = None, force: bool = False ) -> None: """Create a database named `name` in `catalog`. @@ -734,7 +734,7 @@ def create_database( @abc.abstractmethod def drop_database( - self, name: str, catalog: str | None = None, force: bool = False + self, name: str, /, *, catalog: str | None = None, force: bool = False ) -> None: """Drop the database with `name` in `catalog`. @@ -778,7 +778,7 @@ class CanCreateSchema(CanListSchema): def create_schema( self, name: str, database: str | None = None, force: bool = False ) -> None: - self.create_database(name=name, catalog=database, force=force) + self.create_database(name, catalog=database, force=force) @util.deprecated( instead="Use `drop_database` instead", as_of="9.0", removed_in="10.0" @@ -786,7 +786,7 @@ def create_schema( def drop_schema( self, name: str, database: str | None = None, force: bool = False ) -> None: - self.drop_database(name=name, catalog=database, force=force) + self.drop_database(name, catalog=database, force=force) class BaseBackend(abc.ABC, _FileIOHandler): diff --git a/ibis/backends/bigquery/__init__.py b/ibis/backends/bigquery/__init__.py index eaadb144aa83..d359b7608cd3 100644 --- a/ibis/backends/bigquery/__init__.py +++ b/ibis/backends/bigquery/__init__.py @@ -533,6 +533,8 @@ def dataset_id(self): def create_database( self, name: str, + /, + *, catalog: str | None = None, force: bool = False, collate: str | None = None, @@ -560,6 +562,8 @@ def create_database( def drop_database( self, name: str, + /, + *, catalog: str | None = None, force: bool = False, cascade: bool = False, diff --git a/ibis/backends/clickhouse/__init__.py b/ibis/backends/clickhouse/__init__.py index 70cb8734fcb1..3149f7664032 100644 --- a/ibis/backends/clickhouse/__init__.py +++ b/ibis/backends/clickhouse/__init__.py @@ -518,7 +518,7 @@ def _get_schema_using_query(self, query: str) -> sch.Schema: pass def create_database( - self, name: str, *, force: bool = False, engine: str = "Atomic" + self, name: str, /, *, force: bool = False, engine: str = "Atomic" ) -> None: src = sge.Create( this=sg.to_identifier(name), @@ -531,7 +531,7 @@ def create_database( with self._safe_raw_sql(src): pass - def drop_database(self, name: str, *, force: bool = False) -> None: + def drop_database(self, name: str, /, *, force: bool = False) -> None: src = sge.Drop(this=sg.to_identifier(name), kind="DATABASE", exists=force) with self._safe_raw_sql(src): pass diff --git a/ibis/backends/datafusion/__init__.py b/ibis/backends/datafusion/__init__.py index 69c631c31ab4..548f2069dd74 100644 --- a/ibis/backends/datafusion/__init__.py +++ b/ibis/backends/datafusion/__init__.py @@ -262,7 +262,7 @@ def list_databases( ) def create_database( - self, name: str, catalog: str | None = None, force: bool = False + self, name: str, /, *, catalog: str | None = None, force: bool = False ) -> None: # not actually a table, but this is how sqlglot represents schema names db_name = sg.table(name, db=catalog) @@ -270,7 +270,7 @@ def create_database( pass def drop_database( - self, name: str, catalog: str | None = None, force: bool = False + self, name: str, /, *, catalog: str | None = None, force: bool = False ) -> None: db_name = sg.table(name, db=catalog) with self._safe_raw_sql(sge.Drop(kind="SCHEMA", this=db_name, exists=force)): diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index 4e6f5b6c3bc1..6ccd7707a734 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -494,7 +494,7 @@ def load_extension(self, extension: str, force_install: bool = False) -> None: self._load_extensions([extension], force_install=force_install) def create_database( - self, name: str, catalog: str | None = None, force: bool = False + self, name: str, /, *, catalog: str | None = None, force: bool = False ) -> None: if catalog is not None: raise exc.UnsupportedOperationError( @@ -506,7 +506,7 @@ def create_database( pass def drop_database( - self, name: str, catalog: str | None = None, force: bool = False + self, name: str, /, *, catalog: str | None = None, force: bool = False ) -> None: if catalog is not None: raise exc.UnsupportedOperationError( diff --git a/ibis/backends/exasol/__init__.py b/ibis/backends/exasol/__init__.py index 759dfc940d1d..27d3d79d28af 100644 --- a/ibis/backends/exasol/__init__.py +++ b/ibis/backends/exasol/__init__.py @@ -422,7 +422,7 @@ def current_database(self) -> str: return schema def drop_database( - self, name: str, catalog: str | None = None, force: bool = False + self, name: str, /, *, catalog: str | None = None, force: bool = False ) -> None: if catalog is not None: raise NotImplementedError( @@ -437,7 +437,7 @@ def drop_database( con.execute(drop_schema.sql(dialect=self.dialect)) def create_database( - self, name: str, catalog: str | None = None, force: bool = False + self, name: str, /, *, catalog: str | None = None, force: bool = False ) -> None: if catalog is not None: raise NotImplementedError( diff --git a/ibis/backends/flink/__init__.py b/ibis/backends/flink/__init__.py index b0b8ec361794..7a7f89746c29 100644 --- a/ibis/backends/flink/__init__.py +++ b/ibis/backends/flink/__init__.py @@ -115,6 +115,8 @@ def current_database(self) -> str: def create_database( self, name: str, + /, + *, db_properties: dict | None = None, catalog: str | None = None, force: bool = False, @@ -140,7 +142,7 @@ def create_database( self.raw_sql(statement.compile()) def drop_database( - self, name: str, catalog: str | None = None, force: bool = False + self, name: str, /, *, catalog: str | None = None, force: bool = False ) -> None: """Drop a database with name `name`. diff --git a/ibis/backends/impala/__init__.py b/ibis/backends/impala/__init__.py index 2baa4cbd7e8f..de9262f2108a 100644 --- a/ibis/backends/impala/__init__.py +++ b/ibis/backends/impala/__init__.py @@ -297,7 +297,7 @@ def current_database(self) -> str: [(db,)] = cur.fetchall() return db - def create_database(self, name, path=None, force=False): + def create_database(self, name, /, *, path=None, force=False): """Create a new Impala database. Parameters @@ -313,7 +313,7 @@ def create_database(self, name, path=None, force=False): statement = CreateDatabase(name, path=path, can_exist=force) self._safe_exec_sql(statement) - def drop_database(self, name, force=False): + def drop_database(self, name, /, *, force=False): """Drop an Impala database. Parameters diff --git a/ibis/backends/mssql/__init__.py b/ibis/backends/mssql/__init__.py index 938394dfef36..30db163c99bf 100644 --- a/ibis/backends/mssql/__init__.py +++ b/ibis/backends/mssql/__init__.py @@ -375,7 +375,7 @@ def drop_catalog(self, name: str, /, *, force: bool = False) -> None: pass def create_database( - self, name: str, catalog: str | None = None, force: bool = False + self, name: str, /, *, catalog: str | None = None, force: bool = False ) -> None: current_catalog = self.current_catalog should_switch_catalog = catalog is not None and catalog != current_catalog @@ -419,7 +419,7 @@ def create_database( ) def drop_database( - self, name: str, catalog: str | None = None, force: bool = False + self, name: str, /, *, catalog: str | None = None, force: bool = False ) -> None: current_catalog = self.current_catalog should_switch_catalog = catalog is not None and catalog != current_catalog diff --git a/ibis/backends/mysql/__init__.py b/ibis/backends/mysql/__init__.py index 7b279fd1326f..7dae8117eb12 100644 --- a/ibis/backends/mysql/__init__.py +++ b/ibis/backends/mysql/__init__.py @@ -237,14 +237,14 @@ def get_schema( return sch.Schema(fields) - def create_database(self, name: str, force: bool = False) -> None: + def create_database(self, name: str, /, *, force: bool = False) -> None: sql = sge.Create(kind="DATABASE", exist=force, this=sg.to_identifier(name)).sql( self.name ) with self.begin() as cur: cur.execute(sql) - def drop_database(self, name: str, force: bool = False) -> None: + def drop_database(self, name: str, /, *, force: bool = False) -> None: sql = sge.Drop(kind="DATABASE", exist=force, this=sg.to_identifier(name)).sql( self.name ) diff --git a/ibis/backends/postgres/__init__.py b/ibis/backends/postgres/__init__.py index 616480757336..a938934c6267 100644 --- a/ibis/backends/postgres/__init__.py +++ b/ibis/backends/postgres/__init__.py @@ -583,7 +583,7 @@ def _get_schema_using_query(self, query: str) -> sch.Schema: pass def create_database( - self, name: str, catalog: str | None = None, force: bool = False + self, name: str, /, *, catalog: str | None = None, force: bool = False ) -> None: if catalog is not None and catalog != self.current_catalog: raise exc.UnsupportedOperationError( @@ -598,6 +598,8 @@ def create_database( def drop_database( self, name: str, + /, + *, catalog: str | None = None, force: bool = False, cascade: bool = False, diff --git a/ibis/backends/pyspark/__init__.py b/ibis/backends/pyspark/__init__.py index 685feb40a79c..03586b76d51d 100644 --- a/ibis/backends/pyspark/__init__.py +++ b/ibis/backends/pyspark/__init__.py @@ -436,6 +436,7 @@ def execute( def create_database( self, name: str, + /, *, catalog: str | None = None, path: str | Path | None = None, @@ -473,7 +474,7 @@ def create_database( pass def drop_database( - self, name: str, *, catalog: str | None = None, force: bool = False + self, name: str, /, *, catalog: str | None = None, force: bool = False ) -> Any: """Drop a Spark database. diff --git a/ibis/backends/snowflake/__init__.py b/ibis/backends/snowflake/__init__.py index d525d8f39dde..0e5c5421700e 100644 --- a/ibis/backends/snowflake/__init__.py +++ b/ibis/backends/snowflake/__init__.py @@ -698,7 +698,7 @@ def drop_catalog(self, name: str, /, *, force: bool = False) -> None: pass def create_database( - self, name: str, catalog: str | None = None, force: bool = False + self, name: str, /, *, catalog: str | None = None, force: bool = False ) -> None: current_catalog = self.current_catalog current_database = self.current_database @@ -738,7 +738,7 @@ def raw_sql(self, query: str | sg.Expression, **kwargs: Any) -> Any: return cur def drop_database( - self, name: str, catalog: str | None = None, force: bool = False + self, name: str, /, *, catalog: str | None = None, force: bool = False ) -> None: if self.current_database == name and ( catalog is None or self.current_catalog == catalog diff --git a/ibis/backends/trino/__init__.py b/ibis/backends/trino/__init__.py index c1cc00aa3988..c7923daa0bee 100644 --- a/ibis/backends/trino/__init__.py +++ b/ibis/backends/trino/__init__.py @@ -362,7 +362,7 @@ def _get_schema_using_query(self, query: str) -> sch.Schema: ) def create_database( - self, name: str, catalog: str | None = None, force: bool = False + self, name: str, /, *, catalog: str | None = None, force: bool = False ) -> None: with self._safe_raw_sql( sge.Create( @@ -374,7 +374,7 @@ def create_database( pass def drop_database( - self, name: str, catalog: str | None = None, force: bool = False + self, name: str, /, *, catalog: str | None = None, force: bool = False ) -> None: with self._safe_raw_sql( sge.Drop( From 8ca2ea1de46f053437b181980387cd34e1f7ee63 Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Fri, 14 Jun 2024 15:40:52 -0400 Subject: [PATCH 14/23] refactor(table): unify `table` signatures across backends BREAKING CHANGE: The first argument to `table` is now a positional-only argument. All other arguments to `table` are now keyword-only arguments. --- ibis/backends/__init__.py | 2 +- ibis/backends/bigquery/__init__.py | 2 +- ibis/backends/duckdb/__init__.py | 2 +- ibis/backends/flink/__init__.py | 4 +++- ibis/backends/impala/__init__.py | 4 +++- ibis/backends/pandas/__init__.py | 2 +- ibis/backends/polars/__init__.py | 2 +- ibis/backends/sql/__init__.py | 2 ++ 8 files changed, 13 insertions(+), 7 deletions(-) diff --git a/ibis/backends/__init__.py b/ibis/backends/__init__.py index 9198d0e70172..096122f97afc 100644 --- a/ibis/backends/__init__.py +++ b/ibis/backends/__init__.py @@ -973,7 +973,7 @@ def list_tables( @abc.abstractmethod def table( - self, name: str, database: tuple[str, str] | str | None = None + self, name: str, /, *, database: tuple[str, str] | str | None = None ) -> ir.Table: """Construct a table expression. diff --git a/ibis/backends/bigquery/__init__.py b/ibis/backends/bigquery/__init__.py index d359b7608cd3..8c59564272dd 100644 --- a/ibis/backends/bigquery/__init__.py +++ b/ibis/backends/bigquery/__init__.py @@ -579,7 +579,7 @@ def drop_database( self.raw_sql(stmt.sql(self.name)) def table( - self, name: str, database: str | None = None, schema: str | None = None + self, name: str, /, *, database: str | None = None, schema: str | None = None ) -> ir.Table: table_loc = self._warn_and_create_table_loc(database, schema) table = sg.parse_one(f"`{name}`", into=sge.Table, read=self.name) diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index 6ccd7707a734..ecd85baf12cd 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -248,7 +248,7 @@ def create_table( return self.table(name, database=(catalog, database)) def table( - self, name: str, schema: str | None = None, database: str | None = None + self, name: str, /, *, schema: str | None = None, database: str | None = None ) -> ir.Table: """Construct a table expression. diff --git a/ibis/backends/flink/__init__.py b/ibis/backends/flink/__init__.py index 7a7f89746c29..96e22c05247e 100644 --- a/ibis/backends/flink/__init__.py +++ b/ibis/backends/flink/__init__.py @@ -241,6 +241,8 @@ def list_views( def table( self, name: str, + /, + *, database: str | None = None, catalog: str | None = None, ) -> ir.Table: @@ -672,7 +674,7 @@ def create_view( else: raise exc.IbisError(f"Unsupported `obj` type: {type(obj)}") - return self.table(name=name, database=database, catalog=catalog) + return self.table(name, database=database, catalog=catalog) def drop_view( self, diff --git a/ibis/backends/impala/__init__.py b/ibis/backends/impala/__init__.py index de9262f2108a..2700b724ec3b 100644 --- a/ibis/backends/impala/__init__.py +++ b/ibis/backends/impala/__init__.py @@ -449,7 +449,9 @@ def drop_view(self, name, database=None, force=False): stmt = DropView(name, database=database, must_exist=not force) self._safe_exec_sql(stmt) - def table(self, name: str, database: str | None = None, **kwargs: Any) -> ir.Table: + def table( + self, name: str, /, *, database: str | None = None, **kwargs: Any + ) -> ir.Table: expr = super().table(name, database=database, **kwargs) return ImpalaTable(expr.op()) diff --git a/ibis/backends/pandas/__init__.py b/ibis/backends/pandas/__init__.py index 23509f033d1c..36c9d1fd6a08 100644 --- a/ibis/backends/pandas/__init__.py +++ b/ibis/backends/pandas/__init__.py @@ -175,7 +175,7 @@ def list_tables(self, like=None, database=None): """ return self._filter_with_like(list(self.dictionary.keys()), like) - def table(self, name: str, schema: sch.Schema | None = None): + def table(self, name: str, /, *, schema: sch.Schema | None = None): inferred_schema = self.get_schema(name) overridden_schema = {**inferred_schema, **(schema or {})} return ops.DatabaseTable(name, overridden_schema, self).to_expr() diff --git a/ibis/backends/polars/__init__.py b/ibis/backends/polars/__init__.py index 0ffe8620dc54..87a527d1ed19 100644 --- a/ibis/backends/polars/__init__.py +++ b/ibis/backends/polars/__init__.py @@ -71,7 +71,7 @@ def version(self) -> str: def list_tables(self, like=None, database=None): return self._filter_with_like(list(self._tables.keys()), like) - def table(self, name: str) -> ir.Table: + def table(self, name: str, /, *, _schema: sch.Schema | None = None) -> ir.Table: schema = sch.infer(self._tables[name]) return ops.DatabaseTable(name, schema, self).to_expr() diff --git a/ibis/backends/sql/__init__.py b/ibis/backends/sql/__init__.py index 91965d4b551c..83006b273c77 100644 --- a/ibis/backends/sql/__init__.py +++ b/ibis/backends/sql/__init__.py @@ -109,6 +109,8 @@ def _fetch_from_cursor(self, cursor, schema: sch.Schema) -> pd.DataFrame: def table( self, name: str, + /, + *, schema: str | None = None, database: tuple[str, str] | str | None = None, ) -> ir.Table: From 8f55d48af6ae7aec1dbbbd9fc39e96e373fccfbe Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Fri, 14 Jun 2024 16:20:31 -0400 Subject: [PATCH 15/23] refactor(_table): unify `_table(s)` signatures across backends BREAKING CHANGE: all arguments to `list_tables` are now keyword-only arguments. BREAKING CHANGE: The first argument to `create_table` is now a positional-only argument. BREAKING CHANGE: The first argument to `drop_table` is now a positional-only argument. BREAKING CHANGE: The first argument to `truncate_table` is now a positional-only argument. All other arguments are now keyword-only arguments. --- ibis/backends/__init__.py | 4 +++- ibis/backends/bigquery/__init__.py | 3 +++ ibis/backends/clickhouse/__init__.py | 3 ++- ibis/backends/datafusion/__init__.py | 3 ++- ibis/backends/druid/__init__.py | 3 ++- ibis/backends/duckdb/__init__.py | 1 + ibis/backends/exasol/__init__.py | 1 + ibis/backends/flink/__init__.py | 6 ++++-- ibis/backends/flink/tests/test_ddl.py | 12 ++++++------ ibis/backends/impala/__init__.py | 5 +++-- ibis/backends/mssql/__init__.py | 1 + ibis/backends/mysql/__init__.py | 1 + ibis/backends/oracle/__init__.py | 8 +++++--- ibis/backends/pandas/__init__.py | 3 ++- ibis/backends/polars/__init__.py | 3 ++- ibis/backends/postgres/__init__.py | 3 +++ ibis/backends/pyspark/__init__.py | 10 +++++++--- ibis/backends/risingwave/__init__.py | 1 + ibis/backends/risingwave/tests/test_streaming.py | 2 +- ibis/backends/snowflake/__init__.py | 1 + ibis/backends/sql/__init__.py | 4 +++- ibis/backends/sqlite/__init__.py | 3 +++ ibis/backends/trino/__init__.py | 1 + 23 files changed, 58 insertions(+), 24 deletions(-) diff --git a/ibis/backends/__init__.py b/ibis/backends/__init__.py index 096122f97afc..775913d6600a 100644 --- a/ibis/backends/__init__.py +++ b/ibis/backends/__init__.py @@ -935,7 +935,7 @@ def _filter_with_like(values: Iterable[str], like: str | None = None) -> list[st @abc.abstractmethod def list_tables( - self, like: str | None = None, database: tuple[str, str] | str | None = None + self, *, like: str | None = None, database: tuple[str, str] | str | None = None ) -> list[str]: """Return the list of table names in the current database. @@ -1088,6 +1088,7 @@ def execute(self, expr: ir.Expr) -> Any: def create_table( self, name: str, + /, obj: pd.DataFrame | pa.Table | ir.Table | None = None, *, schema: ibis.Schema | None = None, @@ -1127,6 +1128,7 @@ def create_table( def drop_table( self, name: str, + /, *, database: str | None = None, force: bool = False, diff --git a/ibis/backends/bigquery/__init__.py b/ibis/backends/bigquery/__init__.py index 8c59564272dd..d6e1329628b5 100644 --- a/ibis/backends/bigquery/__init__.py +++ b/ibis/backends/bigquery/__init__.py @@ -870,6 +870,7 @@ def list_databases( def list_tables( self, + *, like: str | None = None, database: tuple[str, str] | str | None = None, schema: str | None = None, @@ -920,6 +921,7 @@ def version(self): def create_table( self, name: str, + /, obj: ir.Table | pd.DataFrame | pa.Table @@ -1073,6 +1075,7 @@ def create_table( def drop_table( self, name: str, + /, *, schema: str | None = None, database: tuple[str | str] | str | None = None, diff --git a/ibis/backends/clickhouse/__init__.py b/ibis/backends/clickhouse/__init__.py index 3149f7664032..424a81b1fca2 100644 --- a/ibis/backends/clickhouse/__init__.py +++ b/ibis/backends/clickhouse/__init__.py @@ -536,7 +536,7 @@ def drop_database(self, name: str, /, *, force: bool = False) -> None: with self._safe_raw_sql(src): pass - def truncate_table(self, name: str, database: str | None = None) -> None: + def truncate_table(self, name: str, /, *, database: str | None = None) -> None: ident = sg.table(name, db=database).sql(self.name) with self._safe_raw_sql(f"TRUNCATE TABLE {ident}"): pass @@ -598,6 +598,7 @@ def read_csv( def create_table( self, name: str, + /, obj: ir.Table | pd.DataFrame | pa.Table diff --git a/ibis/backends/datafusion/__init__.py b/ibis/backends/datafusion/__init__.py index 548f2069dd74..b58de7b708de 100644 --- a/ibis/backends/datafusion/__init__.py +++ b/ibis/backends/datafusion/__init__.py @@ -577,6 +577,7 @@ def execute(self, expr: ir.Expr, **kwargs: Any): def create_table( self, name: str, + /, obj: ir.Table | pd.DataFrame | pa.Table @@ -684,7 +685,7 @@ def create_table( return self.table(name, database=database) def truncate_table( - self, name: str, database: str | None = None, schema: str | None = None + self, name: str, /, *, database: str | None = None, schema: str | None = None ) -> None: """Delete all rows from a table. diff --git a/ibis/backends/druid/__init__.py b/ibis/backends/druid/__init__.py index 571b471ead6c..3464048e2ded 100644 --- a/ibis/backends/druid/__init__.py +++ b/ibis/backends/druid/__init__.py @@ -155,6 +155,7 @@ def _fetch_from_cursor(self, cursor, schema: sch.Schema) -> pd.DataFrame: def create_table( self, name: str, + /, obj: pd.DataFrame | pa.Table | ir.Table | None = None, *, schema: sch.Schema | None = None, @@ -164,7 +165,7 @@ def create_table( ) -> ir.Table: raise NotImplementedError() - def drop_table(self, *args, **kwargs): + def drop_table(self, name: str, /, **kwargs): raise NotImplementedError() def list_tables( diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index ecd85baf12cd..48365a4b1129 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -96,6 +96,7 @@ def raw_sql(self, query: str | sg.Expression, **kwargs: Any) -> Any: def create_table( self, name: str, + /, obj: ir.Table | pd.DataFrame | pa.Table diff --git a/ibis/backends/exasol/__init__.py b/ibis/backends/exasol/__init__.py index 27d3d79d28af..05ae13cb762a 100644 --- a/ibis/backends/exasol/__init__.py +++ b/ibis/backends/exasol/__init__.py @@ -307,6 +307,7 @@ def _clean_up_tmp_table(self, ident: sge.Identifier) -> None: def create_table( self, name: str, + /, obj: ir.Table | pd.DataFrame | pa.Table diff --git a/ibis/backends/flink/__init__.py b/ibis/backends/flink/__init__.py index 96e22c05247e..365a0c68a69e 100644 --- a/ibis/backends/flink/__init__.py +++ b/ibis/backends/flink/__init__.py @@ -372,6 +372,7 @@ def execute(self, expr: ir.Expr, **kwargs: Any) -> Any: def create_table( self, name: str, + /, obj: pd.DataFrame | pa.Table | ir.Table | None = None, *, schema: sch.Schema | None = None, @@ -450,7 +451,7 @@ def create_table( if overwrite: if self.list_tables(like=name, temp=temp): self.drop_table( - name=name, + name, catalog=catalog, database=database, temp=temp, @@ -522,6 +523,7 @@ def create_table( def drop_table( self, name: str, + /, *, database: str | None = None, catalog: str | None = None, @@ -758,7 +760,7 @@ def _read_file( } return self.create_table( - name=table_name, + table_name, schema=schema, tbl_properties=tbl_properties, ) diff --git a/ibis/backends/flink/tests/test_ddl.py b/ibis/backends/flink/tests/test_ddl.py index 9f8f15e68195..76f26f16ea12 100644 --- a/ibis/backends/flink/tests/test_ddl.py +++ b/ibis/backends/flink/tests/test_ddl.py @@ -186,7 +186,7 @@ def test_recreate_in_mem_table(con, schema, table_name, temp_table, csv_source_c tbl_properties = None new_table = con.create_table( - name=temp_table, + temp_table, obj=employee_df, schema=schema, tbl_properties=tbl_properties, @@ -203,7 +203,7 @@ def test_recreate_in_mem_table(con, schema, table_name, temp_table, csv_source_c match=r"An error occurred while calling o\d+\.createTemporaryView", ): new_table = con.create_table( - name=temp_table, + temp_table, obj=employee_df, schema=schema, tbl_properties=tbl_properties, @@ -229,7 +229,7 @@ def test_force_recreate_in_mem_table(con, schema_props, temp_table, csv_source_c tbl_properties = None new_table = con.create_table( - name=temp_table, + temp_table, obj=employee_df, schema=schema, tbl_properties=tbl_properties, @@ -242,7 +242,7 @@ def test_force_recreate_in_mem_table(con, schema_props, temp_table, csv_source_c # force recreate the same table a second time should succeed new_table = con.create_table( - name=temp_table, + temp_table, obj=employee_df, schema=schema, tbl_properties=tbl_properties, @@ -347,7 +347,7 @@ def test_create_view( con, temp_table, awards_players_schema, csv_source_configs, temp_view, temp ): table = con.create_table( - name=temp_table, + temp_table, schema=awards_players_schema, tbl_properties=csv_source_configs("awards_players"), ) @@ -401,7 +401,7 @@ def test_create_view( def test_rename_table(con, awards_players_schema, temp_table, csv_source_configs): table_name = temp_table con.create_table( - name=table_name, + table_name, schema=awards_players_schema, tbl_properties=csv_source_configs("awards_players"), ) diff --git a/ibis/backends/impala/__init__.py b/ibis/backends/impala/__init__.py index 2700b724ec3b..c58d094ce529 100644 --- a/ibis/backends/impala/__init__.py +++ b/ibis/backends/impala/__init__.py @@ -458,6 +458,7 @@ def table( def create_table( self, name: str, + /, obj: ir.Table | pd.DataFrame | pa.Table @@ -772,7 +773,7 @@ def insert( ) def drop_table( - self, name: str, *, database: str | None = None, force: bool = False + self, name: str, /, *, database: str | None = None, force: bool = False ) -> None: """Drop an Impala table. @@ -795,7 +796,7 @@ def drop_table( statement = DropTable(name, database=database, must_exist=not force) self._safe_exec_sql(statement) - def truncate_table(self, name: str, database: str | None = None) -> None: + def truncate_table(self, name: str, /, *, database: str | None = None) -> None: """Delete all rows from an existing table. Parameters diff --git a/ibis/backends/mssql/__init__.py b/ibis/backends/mssql/__init__.py index 30db163c99bf..d9de0fd63ae2 100644 --- a/ibis/backends/mssql/__init__.py +++ b/ibis/backends/mssql/__init__.py @@ -521,6 +521,7 @@ def list_databases( def create_table( self, name: str, + /, obj: ir.Table | pd.DataFrame | pa.Table diff --git a/ibis/backends/mysql/__init__.py b/ibis/backends/mysql/__init__.py index 7dae8117eb12..74a697cbe2df 100644 --- a/ibis/backends/mysql/__init__.py +++ b/ibis/backends/mysql/__init__.py @@ -381,6 +381,7 @@ def execute( def create_table( self, name: str, + /, obj: ir.Table | pd.DataFrame | pa.Table diff --git a/ibis/backends/oracle/__init__.py b/ibis/backends/oracle/__init__.py index c4d43f280ac8..fe6bd831458d 100644 --- a/ibis/backends/oracle/__init__.py +++ b/ibis/backends/oracle/__init__.py @@ -235,6 +235,7 @@ def raw_sql(self, query: str | sg.Expression, **kwargs: Any) -> Any: def list_tables( self, + *, like: str | None = None, schema: str | None = None, database: tuple[str, str] | str | None = None, @@ -367,6 +368,7 @@ def get_schema( def create_table( self, name: str, + /, obj: ir.Table | pd.DataFrame | pa.Table @@ -460,9 +462,7 @@ def create_table( cur.execute(insert_stmt) if overwrite: - self.drop_table( - name=final_table.name, database=final_table.db, force=True - ) + self.drop_table(final_table.name, database=final_table.db, force=True) cur.execute( f"ALTER TABLE IF EXISTS {initial_table.sql(self.name)} RENAME TO {final_table.sql(self.name)}" ) @@ -482,6 +482,8 @@ def create_table( def drop_table( self, name: str, + /, + *, database: tuple[str, str] | str | None = None, force: bool = False, ) -> None: diff --git a/ibis/backends/pandas/__init__.py b/ibis/backends/pandas/__init__.py index 36c9d1fd6a08..504e8c8be3f4 100644 --- a/ibis/backends/pandas/__init__.py +++ b/ibis/backends/pandas/__init__.py @@ -195,6 +195,7 @@ def compile(self, expr, *args, **kwargs): def create_table( self, name: str, + /, obj: pd.DataFrame | pa.Table | ir.Table | None = None, *, schema: sch.Schema | None = None, @@ -246,7 +247,7 @@ def create_view( def drop_view(self, name: str, *, force: bool = False) -> None: self.drop_table(name, force=force) - def drop_table(self, name: str, *, force: bool = False) -> None: + def drop_table(self, name: str, /, *, force: bool = False) -> None: try: del self.dictionary[name] except KeyError: diff --git a/ibis/backends/polars/__init__.py b/ibis/backends/polars/__init__.py index 87a527d1ed19..0bb684eaa5ff 100644 --- a/ibis/backends/polars/__init__.py +++ b/ibis/backends/polars/__init__.py @@ -357,6 +357,7 @@ def read_parquet( def create_table( self, name: str, + /, obj: ir.Table | pd.DataFrame | pa.Table @@ -408,7 +409,7 @@ def create_view( name, obj=obj, temp=None, database=database, overwrite=overwrite ) - def drop_table(self, name: str, *, force: bool = False) -> None: + def drop_table(self, name: str, /, *, force: bool = False) -> None: if name in self._tables: del self._tables[name] elif not force: diff --git a/ibis/backends/postgres/__init__.py b/ibis/backends/postgres/__init__.py index a938934c6267..6288539cf45e 100644 --- a/ibis/backends/postgres/__init__.py +++ b/ibis/backends/postgres/__init__.py @@ -621,6 +621,7 @@ def drop_database( def create_table( self, name: str, + /, obj: ir.Table | pd.DataFrame | pa.Table @@ -727,6 +728,8 @@ def create_table( def drop_table( self, name: str, + /, + *, database: str | None = None, force: bool = False, ) -> None: diff --git a/ibis/backends/pyspark/__init__.py b/ibis/backends/pyspark/__init__.py index 03586b76d51d..e07bb953cdc6 100644 --- a/ibis/backends/pyspark/__init__.py +++ b/ibis/backends/pyspark/__init__.py @@ -532,9 +532,13 @@ def get_schema( def create_table( self, name: str, - obj: ( - ir.Table | pd.DataFrame | pa.Table | pl.DataFrame | pl.LazyFrame | None - ) = None, + /, + obj: ir.Table + | pd.DataFrame + | pa.Table + | pl.DataFrame + | pl.LazyFrame + | None = None, *, schema: sch.Schema | None = None, database: str | None = None, diff --git a/ibis/backends/risingwave/__init__.py b/ibis/backends/risingwave/__init__.py index 2270a67dc998..beecdad199ff 100644 --- a/ibis/backends/risingwave/__init__.py +++ b/ibis/backends/risingwave/__init__.py @@ -123,6 +123,7 @@ def do_connect( def create_table( self, name: str, + /, obj: ir.Table | pd.DataFrame | pa.Table diff --git a/ibis/backends/risingwave/tests/test_streaming.py b/ibis/backends/risingwave/tests/test_streaming.py index d3d7bf31717c..a89bb3183e0a 100644 --- a/ibis/backends/risingwave/tests/test_streaming.py +++ b/ibis/backends/risingwave/tests/test_streaming.py @@ -68,7 +68,7 @@ def test_mv_on_table_with_connector(con): "datagen.split.num": "1", } tblc = con.create_table( - name=tblc_name, + tblc_name, obj=None, schema=schema, connector_properties=connector_properties, diff --git a/ibis/backends/snowflake/__init__.py b/ibis/backends/snowflake/__init__.py index 0e5c5421700e..0f0bd346b1e4 100644 --- a/ibis/backends/snowflake/__init__.py +++ b/ibis/backends/snowflake/__init__.py @@ -758,6 +758,7 @@ def drop_database( def create_table( self, name: str, + /, obj: ir.Table | pd.DataFrame | pa.Table diff --git a/ibis/backends/sql/__init__.py b/ibis/backends/sql/__init__.py index 83006b273c77..7e9299aa117d 100644 --- a/ibis/backends/sql/__init__.py +++ b/ibis/backends/sql/__init__.py @@ -286,6 +286,8 @@ def execute( def drop_table( self, name: str, + /, + *, database: tuple[str, str] | str | None = None, force: bool = False, ) -> None: @@ -494,7 +496,7 @@ def _build_insert_template( ).sql(self.dialect) def truncate_table( - self, name: str, database: str | None = None, schema: str | None = None + self, name: str, /, *, database: str | None = None, schema: str | None = None ) -> None: """Delete all rows from a table. diff --git a/ibis/backends/sqlite/__init__.py b/ibis/backends/sqlite/__init__.py index e770cecd72be..63ef0ed6f3d5 100644 --- a/ibis/backends/sqlite/__init__.py +++ b/ibis/backends/sqlite/__init__.py @@ -429,6 +429,7 @@ def attach(self, name: str, path: str | Path) -> None: def create_table( self, name: str, + /, obj: ir.Table | pd.DataFrame | pa.Table @@ -543,6 +544,8 @@ def create_table( def drop_table( self, name: str, + /, + *, database: str | None = None, force: bool = False, ) -> None: diff --git a/ibis/backends/trino/__init__.py b/ibis/backends/trino/__init__.py index c7923daa0bee..d7797bdb32f1 100644 --- a/ibis/backends/trino/__init__.py +++ b/ibis/backends/trino/__init__.py @@ -388,6 +388,7 @@ def drop_database( def create_table( self, name: str, + /, obj: ir.Table | pd.DataFrame | pa.Table From 89203b6407c1f3915c689d2f83a0a7aba056be3d Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Fri, 14 Jun 2024 16:27:04 -0400 Subject: [PATCH 16/23] refactor(compile): unify compile signatures across backends BREAKING CHANGE: The first argument to `compile` is now a positional-only argument. All other arguments to `compile` are now keyword-only arguments. --- ibis/backends/__init__.py | 2 ++ ibis/backends/bigquery/__init__.py | 2 ++ ibis/backends/dask/__init__.py | 2 ++ ibis/backends/flink/__init__.py | 2 ++ ibis/backends/pandas/__init__.py | 2 +- ibis/backends/polars/__init__.py | 7 ++++++- ibis/backends/sql/__init__.py | 2 ++ 7 files changed, 17 insertions(+), 2 deletions(-) diff --git a/ibis/backends/__init__.py b/ibis/backends/__init__.py index 775913d6600a..d194c414b2b6 100644 --- a/ibis/backends/__init__.py +++ b/ibis/backends/__init__.py @@ -1076,6 +1076,8 @@ def _run_pre_execute_hooks(self, expr: ir.Expr) -> None: def compile( self, expr: ir.Expr, + /, + *, params: Mapping[ir.Expr, Any] | None = None, ) -> Any: """Compile an expression.""" diff --git a/ibis/backends/bigquery/__init__.py b/ibis/backends/bigquery/__init__.py index d6e1329628b5..17971b58de01 100644 --- a/ibis/backends/bigquery/__init__.py +++ b/ibis/backends/bigquery/__init__.py @@ -690,6 +690,8 @@ def current_database(self) -> str | None: def compile( self, expr: ir.Expr, + /, + *, limit: str | None = None, params=None, pretty: bool = True, diff --git a/ibis/backends/dask/__init__.py b/ibis/backends/dask/__init__.py index 2a5efe38ea06..060686ce01a3 100644 --- a/ibis/backends/dask/__init__.py +++ b/ibis/backends/dask/__init__.py @@ -80,6 +80,8 @@ def _validate_args(self, expr, limit): def compile( self, expr: ir.Expr, + /, + *, params: dict | None = None, limit: int | None = None, **kwargs, diff --git a/ibis/backends/flink/__init__.py b/ibis/backends/flink/__init__.py index 365a0c68a69e..f7d963cca9d9 100644 --- a/ibis/backends/flink/__init__.py +++ b/ibis/backends/flink/__init__.py @@ -350,6 +350,8 @@ def _register_udf(self, udf_node: ops.ScalarUDF): def compile( self, expr: ir.Expr, + /, + *, params: Mapping[ir.Expr, Any] | None = None, pretty: bool = False, **_: Any, diff --git a/ibis/backends/pandas/__init__.py b/ibis/backends/pandas/__init__.py index 504e8c8be3f4..0a56eb6f5732 100644 --- a/ibis/backends/pandas/__init__.py +++ b/ibis/backends/pandas/__init__.py @@ -189,7 +189,7 @@ def get_schema(self, table_name, *, database=None): return schema - def compile(self, expr, *args, **kwargs): + def compile(self, expr, /, **kwargs): return expr def create_table( diff --git a/ibis/backends/polars/__init__.py b/ibis/backends/polars/__init__.py index 0bb684eaa5ff..75904920ba81 100644 --- a/ibis/backends/polars/__init__.py +++ b/ibis/backends/polars/__init__.py @@ -439,7 +439,12 @@ def has_operation(cls, operation: type[ops.Value]) -> bool: return operation in op_classes or issubclass(operation, op_classes) def compile( - self, expr: ir.Expr, params: Mapping[ir.Expr, object] | None = None, **_: Any + self, + expr: ir.Expr, + /, + *, + params: Mapping[ir.Expr, object] | None = None, + **_: Any, ): if params is None: params = dict() diff --git a/ibis/backends/sql/__init__.py b/ibis/backends/sql/__init__.py index 7e9299aa117d..24096456c78f 100644 --- a/ibis/backends/sql/__init__.py +++ b/ibis/backends/sql/__init__.py @@ -147,6 +147,8 @@ def table( def compile( self, expr: ir.Expr, + /, + *, limit: str | None = None, params: Mapping[ir.Expr, Any] | None = None, pretty: bool = False, From d6f98c56a969efd93a6d7ad02bd676db253aebe7 Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Fri, 14 Jun 2024 16:32:11 -0400 Subject: [PATCH 17/23] refactor(execute): unify execute signatures across backends BREAKING CHANGE: The first argument to `execute` is now a positional-only argument. All other arguments to `execute` are now keyword-only arguments. --- ibis/backends/__init__.py | 2 +- ibis/backends/bigquery/__init__.py | 2 +- ibis/backends/clickhouse/__init__.py | 2 ++ ibis/backends/dask/__init__.py | 2 ++ ibis/backends/datafusion/__init__.py | 2 +- ibis/backends/duckdb/__init__.py | 2 ++ ibis/backends/flink/__init__.py | 2 +- ibis/backends/mysql/__init__.py | 2 +- ibis/backends/pandas/__init__.py | 8 ++++---- ibis/backends/polars/__init__.py | 2 ++ ibis/backends/pyspark/__init__.py | 2 ++ ibis/backends/sql/__init__.py | 2 ++ ibis/backends/tests/test_param.py | 2 +- 13 files changed, 22 insertions(+), 10 deletions(-) diff --git a/ibis/backends/__init__.py b/ibis/backends/__init__.py index d194c414b2b6..12136190a414 100644 --- a/ibis/backends/__init__.py +++ b/ibis/backends/__init__.py @@ -1083,7 +1083,7 @@ def compile( """Compile an expression.""" return self.compiler.to_sql(expr, params=params) - def execute(self, expr: ir.Expr) -> Any: + def execute(self, expr: ir.Expr, /) -> Any: """Execute an expression.""" @abc.abstractmethod diff --git a/ibis/backends/bigquery/__init__.py b/ibis/backends/bigquery/__init__.py index 17971b58de01..0b0855e464ae 100644 --- a/ibis/backends/bigquery/__init__.py +++ b/ibis/backends/bigquery/__init__.py @@ -712,7 +712,7 @@ def compile( self._log(sql) return sql - def execute(self, expr, params=None, limit="default", **kwargs): + def execute(self, expr, /, *, params=None, limit="default", **kwargs): """Compile and execute the given Ibis expression. Compile and execute Ibis expression using this backend client diff --git a/ibis/backends/clickhouse/__init__.py b/ibis/backends/clickhouse/__init__.py index 424a81b1fca2..356ffd5058bf 100644 --- a/ibis/backends/clickhouse/__init__.py +++ b/ibis/backends/clickhouse/__init__.py @@ -379,6 +379,8 @@ def batcher(sql: str, *, schema: pa.Schema) -> Iterator[pa.RecordBatch]: def execute( self, expr: ir.Expr, + /, + *, limit: str | None = "default", external_tables: Mapping[str, pd.DataFrame] | None = None, **kwargs: Any, diff --git a/ibis/backends/dask/__init__.py b/ibis/backends/dask/__init__.py index 060686ce01a3..b3a869b14dee 100644 --- a/ibis/backends/dask/__init__.py +++ b/ibis/backends/dask/__init__.py @@ -97,6 +97,8 @@ def compile( def execute( self, expr: ir.Expr, + /, + *, params: Mapping[ir.Expr, object] | None = None, limit: str = "default", **kwargs, diff --git a/ibis/backends/datafusion/__init__.py b/ibis/backends/datafusion/__init__.py index b58de7b708de..4a39aaf2f17b 100644 --- a/ibis/backends/datafusion/__init__.py +++ b/ibis/backends/datafusion/__init__.py @@ -568,7 +568,7 @@ def to_pyarrow(self, expr: ir.Expr, **kwargs: Any) -> pa.Table: arrow_table = batch_reader.read_all() return expr.__pyarrow_result__(arrow_table) - def execute(self, expr: ir.Expr, **kwargs: Any): + def execute(self, expr: ir.Expr, /, **kwargs: Any): batch_reader = self.to_pyarrow_batches(expr, **kwargs) return expr.__pandas_result__( batch_reader.read_pandas(timestamp_as_object=True) diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index 48365a4b1129..b7927955f575 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -1423,6 +1423,8 @@ def to_pyarrow( def execute( self, expr: ir.Expr, + /, + *, params: Mapping | None = None, limit: str | None = "default", **_: Any, diff --git a/ibis/backends/flink/__init__.py b/ibis/backends/flink/__init__.py index f7d963cca9d9..e0a82c6aead3 100644 --- a/ibis/backends/flink/__init__.py +++ b/ibis/backends/flink/__init__.py @@ -361,7 +361,7 @@ def compile( expr, params=params, pretty=pretty ) # Discard `limit` and other kwargs. - def execute(self, expr: ir.Expr, **kwargs: Any) -> Any: + def execute(self, expr: ir.Expr, /, **kwargs: Any) -> Any: """Execute an expression.""" self._register_udfs(expr) diff --git a/ibis/backends/mysql/__init__.py b/ibis/backends/mysql/__init__.py index 74a697cbe2df..dd01de3bea18 100644 --- a/ibis/backends/mysql/__init__.py +++ b/ibis/backends/mysql/__init__.py @@ -364,7 +364,7 @@ def list_tables( return self._filter_with_like(map(itemgetter(0), out), like) def execute( - self, expr: ir.Expr, limit: str | None = "default", **kwargs: Any + self, expr: ir.Expr, /, *, limit: str | None = "default", **kwargs: Any ) -> Any: """Execute an expression.""" diff --git a/ibis/backends/pandas/__init__.py b/ibis/backends/pandas/__init__.py index 0a56eb6f5732..1f1cf0d24ec7 100644 --- a/ibis/backends/pandas/__init__.py +++ b/ibis/backends/pandas/__init__.py @@ -313,7 +313,7 @@ def to_pyarrow_batches( class Backend(BasePandasBackend): name = "pandas" - def execute(self, query, params=None, limit="default", **kwargs): + def execute(self, expr, /, *, params=None, limit="default", **kwargs): from ibis.backends.pandas.executor import PandasExecutor if limit != "default" and limit is not None: @@ -322,15 +322,15 @@ def execute(self, query, params=None, limit="default", **kwargs): "pandas backend" ) - if not isinstance(query, ir.Expr): + if not isinstance(expr, ir.Expr): raise TypeError( - f"`query` has type {type(query).__name__!r}, expected ibis.expr.types.Expr" + f"`expr` has type {type(expr).__name__!r}, expected ibis.expr.types.Expr" ) params = params or {} params = {k.op() if isinstance(k, ir.Expr) else k: v for k, v in params.items()} - return PandasExecutor.execute(query.op(), backend=self, params=params) + return PandasExecutor.execute(expr.op(), backend=self, params=params) def _load_into_cache(self, name, expr): self.create_table(name, expr.execute()) diff --git a/ibis/backends/polars/__init__.py b/ibis/backends/polars/__init__.py index 75904920ba81..5a528c5c4acc 100644 --- a/ibis/backends/polars/__init__.py +++ b/ibis/backends/polars/__init__.py @@ -489,6 +489,8 @@ def _to_dataframe( def execute( self, expr: ir.Expr, + /, + *, params: Mapping[ir.Expr, object] | None = None, limit: int | None = None, streaming: bool = False, diff --git a/ibis/backends/pyspark/__init__.py b/ibis/backends/pyspark/__init__.py index e07bb953cdc6..b368c10a95de 100644 --- a/ibis/backends/pyspark/__init__.py +++ b/ibis/backends/pyspark/__init__.py @@ -416,6 +416,8 @@ def raw_sql(self, query: str | sg.Expression, **kwargs: Any) -> Any: def execute( self, expr: ir.Expr, + /, + *, params: Mapping | None = None, limit: str | None = "default", **kwargs: Any, diff --git a/ibis/backends/sql/__init__.py b/ibis/backends/sql/__init__.py index 24096456c78f..e04120a455f9 100644 --- a/ibis/backends/sql/__init__.py +++ b/ibis/backends/sql/__init__.py @@ -267,6 +267,8 @@ def _clean_up_cached_table(self, name): def execute( self, expr: ir.Expr, + /, + *, params: Mapping | None = None, limit: str | None = "default", **kwargs: Any, diff --git a/ibis/backends/tests/test_param.py b/ibis/backends/tests/test_param.py index d9ea67601bb4..fad0a90e0170 100644 --- a/ibis/backends/tests/test_param.py +++ b/ibis/backends/tests/test_param.py @@ -207,5 +207,5 @@ def test_scalar_param_date(backend, alltypes, value): def test_scalar_param_nested(con): param = ibis.param("struct>>>") value = OrderedDict([("x", [OrderedDict([("y", [1.0, 2.0, 3.0])])])]) - result = con.execute(param, {param: value}) + result = con.execute(param, params={param: value}) assert pytest.approx(result["x"][0]["y"]) == np.array([1.0, 2.0, 3.0]) From e9620b875c5aeba130f6f44e3070f273dcbd80e4 Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Fri, 14 Jun 2024 16:38:47 -0400 Subject: [PATCH 18/23] refactor(insert): unify insert signatures across backends BREAKING CHANGE: The first argument to `insert` is now a positional-only argument. The second argument, `obj`, is keyword or positional. All other arguments to `insert` are now keyword-only arguments. --- ibis/backends/bigquery/__init__.py | 2 ++ ibis/backends/clickhouse/__init__.py | 12 +++++++----- ibis/backends/flink/__init__.py | 2 ++ ibis/backends/impala/__init__.py | 4 +++- ibis/backends/snowflake/__init__.py | 2 ++ ibis/backends/sql/__init__.py | 2 ++ ibis/backends/sqlite/__init__.py | 2 ++ 7 files changed, 20 insertions(+), 6 deletions(-) diff --git a/ibis/backends/bigquery/__init__.py b/ibis/backends/bigquery/__init__.py index 0b0855e464ae..2414a87f4957 100644 --- a/ibis/backends/bigquery/__init__.py +++ b/ibis/backends/bigquery/__init__.py @@ -759,7 +759,9 @@ def execute(self, expr, /, *, params=None, limit="default", **kwargs): def insert( self, table_name: str, + /, obj: pd.DataFrame | ir.Table | list | dict, + *, schema: str | None = None, database: str | None = None, overwrite: bool = False, diff --git a/ibis/backends/clickhouse/__init__.py b/ibis/backends/clickhouse/__init__.py index 356ffd5058bf..e056f24cfdae 100644 --- a/ibis/backends/clickhouse/__init__.py +++ b/ibis/backends/clickhouse/__init__.py @@ -412,8 +412,10 @@ def execute( def insert( self, - name: str, + table_name: str, + /, obj: pd.DataFrame | ir.Table, + *, settings: Mapping[str, Any] | None = None, overwrite: bool = False, **kwargs: Any, @@ -422,18 +424,18 @@ def insert( import pyarrow as pa if overwrite: - self.truncate_table(name) + self.truncate_table(table_name) if isinstance(obj, pa.Table): - return self.con.insert_arrow(name, obj, settings=settings, **kwargs) + return self.con.insert_arrow(table_name, obj, settings=settings, **kwargs) elif isinstance(obj, pd.DataFrame): return self.con.insert_arrow( - name, pa.Table.from_pandas(obj), settings=settings, **kwargs + table_name, pa.Table.from_pandas(obj), settings=settings, **kwargs ) elif not isinstance(obj, ir.Table): obj = ibis.memtable(obj) - query = self._build_insert_from_table(target=name, source=obj) + query = self._build_insert_from_table(target=table_name, source=obj) external_tables = self._collect_in_memory_tables(obj, {}) external_data = self._normalize_external_tables(external_tables) return self.con.command(query.sql(self.name), external_data=external_data) diff --git a/ibis/backends/flink/__init__.py b/ibis/backends/flink/__init__.py index e0a82c6aead3..3a4d5bee83a2 100644 --- a/ibis/backends/flink/__init__.py +++ b/ibis/backends/flink/__init__.py @@ -860,7 +860,9 @@ def read_json( def insert( self, table_name: str, + /, obj: pa.Table | pd.DataFrame | ir.Table | list | dict, + *, database: str | None = None, catalog: str | None = None, overwrite: bool = False, diff --git a/ibis/backends/impala/__init__.py b/ibis/backends/impala/__init__.py index c58d094ce529..5902caf6e761 100644 --- a/ibis/backends/impala/__init__.py +++ b/ibis/backends/impala/__init__.py @@ -739,7 +739,9 @@ def _wrap_new_table(self, name, database): def insert( self, table_name, - obj=None, + /, + obj, + *, database=None, overwrite=False, partition=None, diff --git a/ibis/backends/snowflake/__init__.py b/ibis/backends/snowflake/__init__.py index 0f0bd346b1e4..fa578d5cbedd 100644 --- a/ibis/backends/snowflake/__init__.py +++ b/ibis/backends/snowflake/__init__.py @@ -1179,7 +1179,9 @@ def read_parquet( def insert( self, table_name: str, + /, obj: pd.DataFrame | ir.Table | list | dict, + *, schema: str | None = None, database: str | None = None, overwrite: bool = False, diff --git a/ibis/backends/sql/__init__.py b/ibis/backends/sql/__init__.py index e04120a455f9..6ddb7803fb29 100644 --- a/ibis/backends/sql/__init__.py +++ b/ibis/backends/sql/__init__.py @@ -371,7 +371,9 @@ def to_pyarrow_batches( def insert( self, table_name: str, + /, obj: pd.DataFrame | ir.Table | list | dict, + *, schema: str | None = None, database: str | None = None, overwrite: bool = False, diff --git a/ibis/backends/sqlite/__init__.py b/ibis/backends/sqlite/__init__.py index 63ef0ed6f3d5..b0b58c4b3b63 100644 --- a/ibis/backends/sqlite/__init__.py +++ b/ibis/backends/sqlite/__init__.py @@ -592,7 +592,9 @@ def create_view( def insert( self, table_name: str, + /, obj: pd.DataFrame | ir.Table | list | dict, + *, database: str | None = None, overwrite: bool = False, ) -> None: From 815d1238bb80328076d01fd50aa80b17ebf455fb Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Fri, 14 Jun 2024 16:44:05 -0400 Subject: [PATCH 19/23] chore(sigcheck): skip signature checks for do_connect We can revisit this, but for now, these can be different enough that I'm ok not enforcing strict compatibility. --- ibis/backends/tests/test_signatures.py | 58 +++++++++++++------------- 1 file changed, 28 insertions(+), 30 deletions(-) diff --git a/ibis/backends/tests/test_signatures.py b/ibis/backends/tests/test_signatures.py index 8eecfa69f8db..7e3a3bd2882c 100644 --- a/ibis/backends/tests/test_signatures.py +++ b/ibis/backends/tests/test_signatures.py @@ -14,18 +14,31 @@ from ibis.backends.sql import SQLBackend from ibis.backends.tests.signature.typecheck import compatible -params = [] - -for module in [ - BaseBackend, - CanCreateCatalog, - CanCreateDatabase, - CanListCatalog, - CanListDatabase, -]: - methods = list(filter(lambda x: not x.startswith("_"), dir(module))) - for method in methods: - params.append((module, method)) +SKIP_METHODS = ["do_connect"] + + +def _scrape_methods(modules): + params = [] + for module in modules: + methods = list(filter(lambda x: not x.startswith("_"), dir(module))) + for method in methods: + # Only test methods that are callable (so we can grab the signature) + # and skip any methods that we don't want to check + if method not in SKIP_METHODS and callable(getattr(module, method)): + params.append((module, method)) + + return params + + +params = _scrape_methods( + [ + BaseBackend, + CanCreateCatalog, + CanCreateDatabase, + CanListCatalog, + CanListDatabase, + ] +) @pytest.mark.parametrize("base_cls, method", params) @@ -33,24 +46,14 @@ def test_signatures(base_cls, method, backend_cls): if not hasattr(backend_cls, method): pytest.skip(f"Method {method} not present in {backend_cls}, skipping...") - if not callable(base_method := getattr(base_cls, method)): - pytest.skip( - f"Method {method} in {base_cls} isn't callable, can't grab signature" - ) - - base_sig = inspect.signature(base_method) + base_sig = inspect.signature(getattr(base_cls, method)) backend_sig = inspect.signature(getattr(backend_cls, method)) # Usage is compatible(implementation_signature, defined_interface_signature, ...) assert compatible(backend_sig, base_sig, check_annotations=False) -sql_backend_params = [] - -for module in [SQLBackend]: - methods = list(filter(lambda x: not x.startswith("_"), dir(module))) - for method in methods: - sql_backend_params.append((module, method)) +sql_backend_params = _scrape_methods([SQLBackend]) @pytest.mark.parametrize("base_cls, method", sql_backend_params) @@ -58,12 +61,7 @@ def test_signatures_sql_backends(base_cls, method, backend_sql_cls): if not hasattr(backend_sql_cls, method): pytest.skip(f"Method {method} not present in {backend_sql_cls}, skipping...") - if not callable(base_method := getattr(base_cls, method)): - pytest.skip( - f"Method {method} in {base_cls} isn't callable, can't grab signature" - ) - - base_sig = inspect.signature(base_method) + base_sig = inspect.signature(getattr(base_cls, method)) backend_sig = inspect.signature(getattr(backend_sql_cls, method)) # Usage is compatible(implementation_signature, defined_interface_signature, ...) From 5279875080bcec399a6d390551162ee2a19e47ea Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Fri, 14 Jun 2024 17:12:02 -0400 Subject: [PATCH 20/23] refactor(views): unify _view signatures across backends BREAKING CHANGES: The first argument to `create_view` is now a positional-only argument. The second argument, `obj`, is keyword or positional. All other arguments to `create_view` are now keyword-only arguments. The first argument to `drop_view` is now a postional-only argument. All other arguments to `drop_view` are now keyword-only arguments. --- ibis/backends/__init__.py | 3 ++- ibis/backends/bigquery/__init__.py | 2 ++ ibis/backends/clickhouse/__init__.py | 1 + ibis/backends/flink/__init__.py | 6 ++++-- ibis/backends/flink/tests/test_ddl.py | 10 +++++----- ibis/backends/impala/__init__.py | 3 ++- ibis/backends/pandas/__init__.py | 3 ++- ibis/backends/polars/__init__.py | 3 ++- ibis/backends/pyspark/__init__.py | 1 + ibis/backends/sql/__init__.py | 2 ++ ibis/backends/sqlite/__init__.py | 1 + 11 files changed, 24 insertions(+), 11 deletions(-) diff --git a/ibis/backends/__init__.py b/ibis/backends/__init__.py index 12136190a414..1d2be9876a4a 100644 --- a/ibis/backends/__init__.py +++ b/ibis/backends/__init__.py @@ -1170,6 +1170,7 @@ def rename_table(self, old_name: str, new_name: str) -> None: def create_view( self, name: str, + /, obj: ir.Table, *, database: str | None = None, @@ -1198,7 +1199,7 @@ def create_view( @abc.abstractmethod def drop_view( - self, name: str, *, database: str | None = None, force: bool = False + self, name: str, /, *, database: str | None = None, force: bool = False ) -> None: """Drop a view. diff --git a/ibis/backends/bigquery/__init__.py b/ibis/backends/bigquery/__init__.py index 2414a87f4957..27af36a3439a 100644 --- a/ibis/backends/bigquery/__init__.py +++ b/ibis/backends/bigquery/__init__.py @@ -1101,6 +1101,7 @@ def drop_table( def create_view( self, name: str, + /, obj: ir.Table, *, schema: str | None = None, @@ -1127,6 +1128,7 @@ def create_view( def drop_view( self, name: str, + /, *, schema: str | None = None, database: str | None = None, diff --git a/ibis/backends/clickhouse/__init__.py b/ibis/backends/clickhouse/__init__.py index e056f24cfdae..ccffd581d8fa 100644 --- a/ibis/backends/clickhouse/__init__.py +++ b/ibis/backends/clickhouse/__init__.py @@ -763,6 +763,7 @@ def create_table( def create_view( self, name: str, + /, obj: ir.Table, *, database: str | None = None, diff --git a/ibis/backends/flink/__init__.py b/ibis/backends/flink/__init__.py index 3a4d5bee83a2..ccc2fa5c1729 100644 --- a/ibis/backends/flink/__init__.py +++ b/ibis/backends/flink/__init__.py @@ -485,7 +485,7 @@ def create_table( # which may not be ideal. We plan to get back to this later. # Ref: https://github.com/ibis-project/ibis/pull/7479#discussion_r1416237088 return self.create_view( - name=name, + name, obj=dataframe, schema=schema, database=database, @@ -586,6 +586,7 @@ def rename_table( def create_view( self, name: str, + /, obj: pd.DataFrame | ir.Table, *, schema: sch.Schema | None = None, @@ -639,7 +640,7 @@ def create_view( if overwrite and self.list_views(like=name, temp=temp): self.drop_view( - name=name, + name, database=database, catalog=catalog, temp=temp, @@ -683,6 +684,7 @@ def create_view( def drop_view( self, name: str, + /, *, database: str | None = None, catalog: str | None = None, diff --git a/ibis/backends/flink/tests/test_ddl.py b/ibis/backends/flink/tests/test_ddl.py index 76f26f16ea12..499fe4585104 100644 --- a/ibis/backends/flink/tests/test_ddl.py +++ b/ibis/backends/flink/tests/test_ddl.py @@ -354,7 +354,7 @@ def test_create_view( assert temp_table in con.list_tables() con.create_view( - name=temp_view, + temp_view, obj=table, force=False, temp=temp, @@ -366,7 +366,7 @@ def test_create_view( # Try to re-create the same view with `force=False` with pytest.raises(Py4JJavaError): con.create_view( - name=temp_view, + temp_view, obj=table, force=False, temp=temp, @@ -376,7 +376,7 @@ def test_create_view( # Try to re-create the same view with `force=True` con.create_view( - name=temp_view, + temp_view, obj=table, force=True, temp=temp, @@ -386,7 +386,7 @@ def test_create_view( # Overwrite the view con.create_view( - name=temp_view, + temp_view, obj=table, force=False, temp=temp, @@ -394,7 +394,7 @@ def test_create_view( ) assert view_list == sorted(con.list_tables()) - con.drop_view(name=temp_view, temp=temp, force=True) + con.drop_view(temp_view, temp=temp, force=True) assert temp_view not in con.list_tables() diff --git a/ibis/backends/impala/__init__.py b/ibis/backends/impala/__init__.py index 5902caf6e761..7d3c0c69c4c3 100644 --- a/ibis/backends/impala/__init__.py +++ b/ibis/backends/impala/__init__.py @@ -435,6 +435,7 @@ def set_compression_codec(self, codec): def create_view( self, name: str, + /, obj: ir.Table, *, database: str | None = None, @@ -445,7 +446,7 @@ def create_view( self._safe_exec_sql(statement) return self.table(name, database=database) - def drop_view(self, name, database=None, force=False): + def drop_view(self, name, /, *, database=None, force=False): stmt = DropView(name, database=database, must_exist=not force) self._safe_exec_sql(stmt) diff --git a/ibis/backends/pandas/__init__.py b/ibis/backends/pandas/__init__.py index 1f1cf0d24ec7..0c731016c56e 100644 --- a/ibis/backends/pandas/__init__.py +++ b/ibis/backends/pandas/__init__.py @@ -235,6 +235,7 @@ def create_table( def create_view( self, name: str, + /, obj: ir.Table, *, database: str | None = None, @@ -244,7 +245,7 @@ def create_view( name, obj=obj, temp=None, database=database, overwrite=overwrite ) - def drop_view(self, name: str, *, force: bool = False) -> None: + def drop_view(self, name: str, /, *, force: bool = False) -> None: self.drop_table(name, force=force) def drop_table(self, name: str, /, *, force: bool = False) -> None: diff --git a/ibis/backends/polars/__init__.py b/ibis/backends/polars/__init__.py index 5a528c5c4acc..49c5e29f9fa3 100644 --- a/ibis/backends/polars/__init__.py +++ b/ibis/backends/polars/__init__.py @@ -400,6 +400,7 @@ def create_table( def create_view( self, name: str, + /, obj: ir.Table, *, database: str | None = None, @@ -415,7 +416,7 @@ def drop_table(self, name: str, /, *, force: bool = False) -> None: elif not force: raise com.IbisError(f"Table {name!r} does not exist") - def drop_view(self, name: str, *, force: bool = False) -> None: + def drop_view(self, name: str, /, *, force: bool = False) -> None: self.drop_table(name, force=force) def get_schema(self, table_name): diff --git a/ibis/backends/pyspark/__init__.py b/ibis/backends/pyspark/__init__.py index b368c10a95de..08db71aa6ace 100644 --- a/ibis/backends/pyspark/__init__.py +++ b/ibis/backends/pyspark/__init__.py @@ -619,6 +619,7 @@ def create_table( def create_view( self, name: str, + /, obj: ir.Table, *, database: str | None = None, diff --git a/ibis/backends/sql/__init__.py b/ibis/backends/sql/__init__.py index 6ddb7803fb29..c703da0673bb 100644 --- a/ibis/backends/sql/__init__.py +++ b/ibis/backends/sql/__init__.py @@ -219,6 +219,7 @@ def _register_udfs(self, expr: ir.Expr) -> None: def create_view( self, name: str, + /, obj: ir.Table, *, database: str | None = None, @@ -242,6 +243,7 @@ def create_view( def drop_view( self, name: str, + /, *, database: str | None = None, schema: str | None = None, diff --git a/ibis/backends/sqlite/__init__.py b/ibis/backends/sqlite/__init__.py index b0b58c4b3b63..d57f6b04d98e 100644 --- a/ibis/backends/sqlite/__init__.py +++ b/ibis/backends/sqlite/__init__.py @@ -560,6 +560,7 @@ def drop_table( def create_view( self, name: str, + /, obj: ir.Table, *, database: str | None = None, From c5c0fb8364f48b6433782d0bcc1badca9b4e9284 Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Mon, 17 Jun 2024 15:28:57 -0400 Subject: [PATCH 21/23] chore(clickhouse): remove XPASSing xfail --- ibis/backends/tests/test_param.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ibis/backends/tests/test_param.py b/ibis/backends/tests/test_param.py index fad0a90e0170..7545f1a3b618 100644 --- a/ibis/backends/tests/test_param.py +++ b/ibis/backends/tests/test_param.py @@ -195,7 +195,6 @@ def test_scalar_param_date(backend, alltypes, value): "postgres", "risingwave", "datafusion", - "clickhouse", "sqlite", "impala", "oracle", From a574c920b8ed4a89a1cdb6d7b15b4d15185fdce1 Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Mon, 19 Aug 2024 16:51:34 -0400 Subject: [PATCH 22/23] refactor(to_parquet_dir): use `directory` as common arg name --- ibis/backends/__init__.py | 3 ++- ibis/backends/pyspark/__init__.py | 11 +++++++---- ibis/backends/pyspark/tests/test_import_export.py | 2 +- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/ibis/backends/__init__.py b/ibis/backends/__init__.py index 1d2be9876a4a..c0f0b0069814 100644 --- a/ibis/backends/__init__.py +++ b/ibis/backends/__init__.py @@ -493,7 +493,8 @@ def to_parquet_dir( expr The ibis expression to execute and persist to parquet. directory - The data source. A string or Path to the directory where the parquet file will be written. + A string or Path to the directory where the parquet file will be + written. params Mapping of scalar parameter expressions to value. **kwargs diff --git a/ibis/backends/pyspark/__init__.py b/ibis/backends/pyspark/__init__.py index 08db71aa6ace..e5f80eb43b4d 100644 --- a/ibis/backends/pyspark/__init__.py +++ b/ibis/backends/pyspark/__init__.py @@ -1314,7 +1314,8 @@ def _to_filesystem_output( def to_parquet_dir( self, expr: ir.Expr, - path: str | Path, + directory: str | Path, + *, params: Mapping[ir.Scalar, Any] | None = None, limit: int | str | None = None, options: Mapping[str, str] | None = None, @@ -1325,8 +1326,8 @@ def to_parquet_dir( ---------- expr The ibis expression to execute and persist to parquet. - path - The data source. A string or Path to the parquet directory. + directory + A string or Path to the parquet directory. params Mapping of scalar parameter expressions to value. limit @@ -1341,7 +1342,9 @@ def to_parquet_dir( Returns a Pyspark StreamingQuery object if in streaming mode, otherwise None """ self._run_pre_execute_hooks(expr) - return self._to_filesystem_output(expr, "parquet", path, params, limit, options) + return self._to_filesystem_output( + expr, "parquet", directory, params, limit, options + ) @util.experimental def to_csv_dir( diff --git a/ibis/backends/pyspark/tests/test_import_export.py b/ibis/backends/pyspark/tests/test_import_export.py index 94c47638afc2..43be9b3bbcd0 100644 --- a/ibis/backends/pyspark/tests/test_import_export.py +++ b/ibis/backends/pyspark/tests/test_import_export.py @@ -66,7 +66,7 @@ def test_to_parquet_dir(con_streaming, tmp_path): path = tmp_path / "out" con_streaming.to_parquet_dir( t.limit(5), - path=path, + directory=path, options={"checkpointLocation": tmp_path / "checkpoint"}, ) sleep(2) From 082f519231b8698d6b6bdd4208c88c8cc3f582cc Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Mon, 19 Aug 2024 16:52:06 -0400 Subject: [PATCH 23/23] chore(sigcheck): ignore `from_connection` --- ibis/backends/tests/test_signatures.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ibis/backends/tests/test_signatures.py b/ibis/backends/tests/test_signatures.py index 7e3a3bd2882c..3d5880650114 100644 --- a/ibis/backends/tests/test_signatures.py +++ b/ibis/backends/tests/test_signatures.py @@ -14,7 +14,7 @@ from ibis.backends.sql import SQLBackend from ibis.backends.tests.signature.typecheck import compatible -SKIP_METHODS = ["do_connect"] +SKIP_METHODS = ["do_connect", "from_connection"] def _scrape_methods(modules):