From 6cd3eeeaa88323fd14a4388be9d42a64a22d2ea1 Mon Sep 17 00:00:00 2001 From: Nick Crews Date: Thu, 25 Jul 2024 04:06:35 -0800 Subject: [PATCH] fix(druid): get basic timestamp functionality working (#9692) Co-authored-by: Phillip Cloud <417981+cpcloud@users.noreply.github.com> --- ci/schema/druid.sql | 19 +- ibis/backends/druid/tests/conftest.py | 23 +- ibis/backends/sql/compilers/druid.py | 11 + ibis/backends/tests/test_aggregation.py | 10 +- ibis/backends/tests/test_export.py | 22 +- ibis/backends/tests/test_generic.py | 1 - ibis/backends/tests/test_param.py | 8 +- ibis/backends/tests/test_temporal.py | 360 +++++------------------- 8 files changed, 119 insertions(+), 335 deletions(-) diff --git a/ci/schema/druid.sql b/ci/schema/druid.sql index 98a9701d8afc..a61af702275f 100644 --- a/ci/schema/druid.sql +++ b/ci/schema/druid.sql @@ -36,11 +36,24 @@ PARTITIONED BY ALL TIME; REPLACE INTO "functional_alltypes" OVERWRITE ALL -SELECT * +SELECT + "id", + "bool_col", + "tinyint_col", + "smallint_col", + "int_col", + "bigint_col", + "float_col", + "double_col", + "date_string_col", + "string_col", + TIME_PARSE(CONCAT(REPLACE("timestamp_col", ' ', 'T'), 'Z')) AS "timestamp_col", + "year", + "month" FROM TABLE( EXTERN( - '{"type":"local","files":["/data/functional_alltypes.parquet"]}', - '{"type":"parquet"}', + '{"type":"local","files":["/data/functional_alltypes.csv"]}', + '{"type":"csv","skipHeaderRows":1,"columns":["id","bool_col","tinyint_col","smallint_col","int_col","bigint_col","float_col","double_col","date_string_col","string_col","timestamp_col","year","month"]}', '[{"name":"id","type":"long"},{"name":"bool_col","type":"long"},{"name":"tinyint_col","type":"long"},{"name":"smallint_col","type":"long"},{"name":"int_col","type":"long"},{"name":"bigint_col","type":"long"},{"name":"float_col","type":"double"},{"name":"double_col","type":"double"},{"name":"date_string_col","type":"string"},{"name":"string_col","type":"string"},{"name":"timestamp_col","type":"string"},{"name":"year","type":"long"},{"name":"month","type":"long"}]' ) ) diff --git a/ibis/backends/druid/tests/conftest.py b/ibis/backends/druid/tests/conftest.py index 9dce305b66bc..306926632cb4 100644 --- a/ibis/backends/druid/tests/conftest.py +++ b/ibis/backends/druid/tests/conftest.py @@ -94,7 +94,7 @@ def run_query(session: Session, query: str) -> None: class TestConf(ServiceBackendTest): # druid has the same rounding behavior as postgres check_dtype = False - returned_timestamp_unit = "s" + returned_timestamp_unit = "ms" supports_arrays = False native_bool = True supports_structs = False @@ -106,13 +106,26 @@ class TestConf(ServiceBackendTest): @property def functional_alltypes(self) -> ir.Table: t = self.connection.table("functional_alltypes") - # The parquet loading for booleans appears to be broken in Druid, so - # I'm using this as a workaround to make the data match what's on disk. - return t.mutate(bool_col=1 - t.id % 2) + return t.mutate( + # The parquet loading for booleans appears to be broken in Druid, so + # I'm using this as a workaround to make the data match what's on disk. + bool_col=1 - t.id % 2, + # timestamp_col is loaded as a long because druid's type system is + # awful: it does 99% of the work of a proper timestamp type, but + # encodes it as an integer. I've never seen or heard of any other + # tool that calls itself a time series database or "good for + # working with time series", that lacks a first-class timestamp + # type. + timestamp_col=t.timestamp_col.to_timestamp(unit="ms"), + ) @property def test_files(self) -> Iterable[Path]: - return self.data_dir.joinpath("parquet").glob("*.parquet") + return [ + path + for path in self.data_dir.joinpath("parquet").glob("*.parquet") + if path.name != "functional_alltypes.parquet" + ] + [self.data_dir.joinpath("csv", "functional_alltypes.csv")] def _load_data(self, **_: Any) -> None: """Load test data into a druid backend instance. diff --git a/ibis/backends/sql/compilers/druid.py b/ibis/backends/sql/compilers/druid.py index 56196875d9f1..fd7bed49dff1 100644 --- a/ibis/backends/sql/compilers/druid.py +++ b/ibis/backends/sql/compilers/druid.py @@ -4,11 +4,13 @@ import sqlglot.expressions as sge import toolz +import ibis.common.exceptions as exc import ibis.expr.datatypes as dt import ibis.expr.operations as ops from ibis.backends.sql.compilers.base import NULL, AggGen, SQLGlotCompiler from ibis.backends.sql.datatypes import DruidType from ibis.backends.sql.dialects import Druid +from ibis.common.temporal import TimestampUnit class DruidCompiler(SQLGlotCompiler): @@ -36,7 +38,9 @@ class DruidCompiler(SQLGlotCompiler): ops.ArrayZip, ops.CountDistinctStar, ops.Covariance, + ops.Date, ops.DateDelta, + ops.DateFromYMD, ops.DayOfWeekIndex, ops.DayOfWeekName, ops.First, @@ -169,6 +173,13 @@ def visit_Cast(self, op, *, arg, to): return self.f.time_parse(arg) return super().visit_Cast(op, arg=arg, to=to) + def visit_TimestampFromUNIX(self, op, *, arg, unit): + if unit == TimestampUnit.SECOND: + return self.f.millis_to_timestamp(arg * 1_000) + elif unit == TimestampUnit.MILLISECOND: + return self.f.millis_to_timestamp(arg) + raise exc.UnsupportedArgumentError(f"Druid doesn't support {unit} units") + def visit_TimestampFromYMDHMS( self, op, *, year, month, day, hours, minutes, seconds ): diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index b95eaf676bfe..fcf2d24777bc 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -110,11 +110,6 @@ def mean_udf(s): lambda t: t.timestamp_col.max(), lambda t: t.timestamp_col.max(), id="timestamp_max", - marks=pytest.mark.broken( - ["druid"], - raises=PyDruidProgrammingError, - reason="Max aggregation is not supported for 'STRING' type SQL", - ), ), ] @@ -1174,12 +1169,13 @@ def test_string_quantile(alltypes, func): assert result == "a" -@pytest.mark.notimpl(["bigquery", "sqlite"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl( + ["bigquery", "sqlite", "druid"], raises=com.OperationNotDefinedError +) @pytest.mark.notyet( ["impala", "mysql", "mssql", "trino", "exasol", "flink"], raises=com.OperationNotDefinedError, ) -@pytest.mark.broken(["druid"], raises=AttributeError) @pytest.mark.notyet( ["snowflake"], raises=SnowflakeProgrammingError, diff --git a/ibis/backends/tests/test_export.py b/ibis/backends/tests/test_export.py index c75d64cf2cd3..9321b1f594e0 100644 --- a/ibis/backends/tests/test_export.py +++ b/ibis/backends/tests/test_export.py @@ -28,20 +28,11 @@ pa = pytest.importorskip("pyarrow") limit = [ - param( - 42, - id="limit", - # limit not implemented for pandas-family backends - marks=[pytest.mark.notimpl(["dask", "pandas"])], - ), + # limit not implemented for pandas-family backends + param(42, id="limit", marks=pytest.mark.notimpl(["dask", "pandas"])), ] -no_limit = [ - param( - None, - id="nolimit", - ) -] +no_limit = [param(None, id="nolimit")] limit_no_limit = limit + no_limit @@ -426,7 +417,9 @@ def test_roundtrip_delta(backend, con, alltypes, tmp_path): @pytest.mark.notimpl( - ["druid"], raises=AttributeError, reason="string type is used for timestamp_col" + ["druid"], + raises=PyDruidProgrammingError, + reason="Invalid SQL generated; druid doesn't know about TIMESTAMPTZ", ) def test_arrow_timestamp_with_time_zone(alltypes): from ibis.formats.pyarrow import PyArrowType @@ -512,9 +505,8 @@ def test_to_pandas_batches_column(backend, con, n): assert sum(map(len, t.to_pandas_batches())) == n -@pytest.mark.notimpl(["druid"]) def test_to_pandas_batches_scalar(backend, con): - t = backend.functional_alltypes.timestamp_col.max() + t = backend.functional_alltypes.int_col.max() expected = t.execute() result1 = list(con.to_pandas_batches(t)) diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index f63d237323fd..f983148325ac 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -1120,7 +1120,6 @@ def test_between(backend, alltypes, df): backend.assert_series_equal(result, expected) -@pytest.mark.notimpl(["druid"]) def test_interactive(alltypes, monkeypatch): monkeypatch.setattr(ibis.options, "interactive", True) diff --git a/ibis/backends/tests/test_param.py b/ibis/backends/tests/test_param.py index 78abe2e99f18..239e7524c4af 100644 --- a/ibis/backends/tests/test_param.py +++ b/ibis/backends/tests/test_param.py @@ -124,25 +124,19 @@ def test_scalar_param_map(con): marks=[pytest.mark.notimpl(["druid"])], ), param( - "2009-01-20 01:02:03", - "timestamp", - "timestamp_col", - id="string_timestamp", - marks=[pytest.mark.notimpl(["druid"])], + "2009-01-20 01:02:03", "timestamp", "timestamp_col", id="string_timestamp" ), param( datetime.date(2009, 1, 20), "timestamp", "timestamp_col", id="date_timestamp", - marks=[pytest.mark.notimpl(["druid"])], ), param( datetime.datetime(2009, 1, 20, 1, 2, 3), "timestamp", "timestamp_col", id="datetime_timestamp", - marks=[pytest.mark.notimpl(["druid"])], ), ], ) diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index 8fe67904040e..669562478439 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -45,12 +45,14 @@ @pytest.mark.parametrize("attr", ["year", "month", "day"]) @pytest.mark.parametrize( "expr_fn", - [param(lambda c: c.date(), id="date"), param(lambda c: c.cast("date"), id="cast")], -) -@pytest.mark.notimpl( - ["druid"], - raises=AttributeError, - reason="Can only use .dt accessor with datetimelike values", + [ + param( + methodcaller("date"), + marks=[pytest.mark.notimpl(["druid"], raises=com.OperationNotDefinedError)], + id="date", + ), + param(methodcaller("cast", "date"), id="cast"), + ], ) def test_date_extract(backend, alltypes, df, attr, expr_fn): expr = getattr(expr_fn(alltypes.timestamp_col), attr)() @@ -85,11 +87,6 @@ def test_date_extract(backend, alltypes, df, attr, expr_fn): "second", ], ) -@pytest.mark.notimpl( - ["druid"], - raises=(AttributeError, com.OperationNotDefinedError), - reason="AttributeError: 'StringColumn' object has no attribute 'X'", -) def test_timestamp_extract(backend, alltypes, df, attr): method = getattr(alltypes.timestamp_col, attr) expr = method().name(attr) @@ -196,12 +193,7 @@ def test_timestamp_extract_literal(con, func, expected): assert con.execute(func(value).name("tmp")) == expected -@pytest.mark.notimpl(["oracle"], raises=com.OperationNotDefinedError) -@pytest.mark.broken( - ["druid"], - raises=AttributeError, - reason="'StringColumn' object has no attribute 'microsecond'", -) +@pytest.mark.notimpl(["oracle", "druid"], raises=com.OperationNotDefinedError) @pytest.mark.notyet( ["pyspark"], raises=com.UnsupportedOperationError, @@ -223,12 +215,7 @@ def test_timestamp_extract_microseconds(backend, alltypes, df): backend.assert_series_equal(result, expected) -@pytest.mark.notimpl(["oracle"], raises=com.OperationNotDefinedError) -@pytest.mark.broken( - ["druid"], - raises=AttributeError, - reason="'StringColumn' object has no attribute 'millisecond'", -) +@pytest.mark.notimpl(["oracle", "druid"], raises=com.OperationNotDefinedError) @pytest.mark.broken(["sqlite"], raises=AssertionError) def test_timestamp_extract_milliseconds(backend, alltypes, df): expr = alltypes.timestamp_col.millisecond().name("millisecond") @@ -240,11 +227,7 @@ def test_timestamp_extract_milliseconds(backend, alltypes, df): @pytest.mark.notimpl(["oracle"], raises=com.OperationNotDefinedError) -@pytest.mark.broken( - ["druid"], - raises=AttributeError, - reason="'StringColumn' object has no attribute 'epoch_seconds'", -) +@pytest.mark.broken(["druid"], raises=PyDruidProgrammingError) @pytest.mark.broken( ["bigquery"], raises=GoogleBadRequest, @@ -270,11 +253,6 @@ def test_timestamp_extract_epoch_seconds(backend, alltypes, df): @pytest.mark.notimpl(["oracle"], raises=com.OperationNotDefinedError) -@pytest.mark.notimpl( - ["druid"], - raises=AttributeError, - reason="'StringColumn' object has no attribute 'week_of_year'", -) def test_timestamp_extract_week_of_year(backend, alltypes, df): expr = alltypes.timestamp_col.week_of_year().name("tmp") result = expr.execute() @@ -361,6 +339,7 @@ def test_timestamp_extract_week_of_year(backend, alltypes, df): ["clickhouse", "mysql", "sqlite", "datafusion", "exasol"], raises=com.UnsupportedOperationError, ), + pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError), ], ), param( @@ -376,6 +355,7 @@ def test_timestamp_extract_week_of_year(backend, alltypes, df): raises=Py4JJavaError, reason="microseconds not supported in truncation", ), + pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError), ], ), param( @@ -398,6 +378,7 @@ def test_timestamp_extract_week_of_year(backend, alltypes, df): "mssql", "datafusion", "exasol", + "druid", ], raises=com.UnsupportedOperationError, ), @@ -415,11 +396,6 @@ def test_timestamp_extract_week_of_year(backend, alltypes, df): ), ], ) -@pytest.mark.broken( - ["druid"], - raises=AttributeError, - reason="AttributeError: 'StringColumn' object has no attribute 'truncate'", -) def test_timestamp_truncate(backend, alltypes, df, ibis_unit, pandas_unit): expr = alltypes.timestamp_col.truncate(ibis_unit).name("tmp") @@ -437,13 +413,12 @@ def test_timestamp_truncate(backend, alltypes, df, ibis_unit, pandas_unit): @pytest.mark.parametrize( - ("ibis_unit", "pandas_unit"), + "unit", [ - ("Y", "Y"), - ("M", "M"), - ("D", "D"), + "Y", + "M", + "D", param( - "W", "W", marks=[ pytest.mark.notyet(["mysql"], raises=com.UnsupportedOperationError), @@ -456,19 +431,11 @@ def test_timestamp_truncate(backend, alltypes, df, ibis_unit, pandas_unit): ), ], ) -@pytest.mark.broken(["druid"], reason="snaps to the UNIX epoch", raises=AssertionError) -@pytest.mark.broken( - ["druid"], - raises=AttributeError, - reason="AttributeError: 'StringColumn' object has no attribute 'date'", -) -def test_date_truncate(backend, alltypes, df, ibis_unit, pandas_unit): - expr = alltypes.timestamp_col.date().truncate(ibis_unit).name("tmp") +@pytest.mark.notimpl(["druid"], raises=com.OperationNotDefinedError) +def test_date_truncate(backend, alltypes, df, unit): + expr = alltypes.timestamp_col.date().truncate(unit).name("tmp") - if ibis_unit in ("Y", "M", "D", "W"): - expected = df.timestamp_col.dt.to_period(pandas_unit).dt.to_timestamp().dt.date - else: - expected = df.timestamp_col.dt.floor(pandas_unit).dt.date + expected = df.timestamp_col.dt.to_period(unit).dt.to_timestamp().dt.date result = expr.execute() expected = backend.default_series_rename(expected) @@ -601,15 +568,9 @@ def test_date_truncate(backend, alltypes, df, ibis_unit, pandas_unit): ], ) @pytest.mark.notimpl( - ["datafusion", "sqlite", "snowflake", "mssql", "oracle"], + ["datafusion", "sqlite", "snowflake", "mssql", "oracle", "druid", "exasol"], raises=com.OperationNotDefinedError, ) -@pytest.mark.notimpl( - ["druid"], - raises=ValidationError, - reason="Given argument with datatype interval('h') is not implicitly castable to string", -) -@pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) def test_integer_to_interval_timestamp( backend, con, alltypes, df, unit, displacement_type ): @@ -726,11 +687,7 @@ def convert_to_offset(x): pytest.mark.notimpl( ["sqlite", "exasol"], raises=com.OperationNotDefinedError ), - pytest.mark.notimpl( - ["druid"], - raises=ValidationError, - reason="Given argument with datatype interval('D') is not implicitly castable to string", - ), + pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError), ], ), param( @@ -745,6 +702,7 @@ def convert_to_offset(x): raises=com.OperationNotDefinedError, ), pytest.mark.notimpl(["impala"], raises=com.UnsupportedOperationError), + pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError), pytest.mark.notimpl(["mysql"], raises=sg.ParseError), pytest.mark.notimpl( ["druid"], @@ -766,11 +724,7 @@ def convert_to_offset(x): ), pytest.mark.notimpl(["impala"], raises=com.UnsupportedOperationError), pytest.mark.notimpl(["mysql"], raises=sg.ParseError), - pytest.mark.notimpl( - ["druid"], - raises=ValidationError, - reason="alltypes.timestamp_col is represented as string", - ), + pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError), ], ), param( @@ -778,14 +732,10 @@ def convert_to_offset(x): lambda t, _: t.timestamp_col - pd.Timedelta(days=17), id="timestamp-subtract-interval", marks=[ - pytest.mark.notimpl( - ["druid"], - raises=TypeError, - reason="unsupported operand type(s) for -: 'StringColumn' and 'IntervalScalar'", - ), pytest.mark.notimpl( ["sqlite", "exasol"], raises=com.OperationNotDefinedError ), + pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError), ], ), param( @@ -799,11 +749,8 @@ def convert_to_offset(x): id="date-add-interval", marks=[ pytest.mark.notimpl( - ["druid"], - raises=AttributeError, - reason="'StringColumn' object has no attribute 'date'", + ["exasol", "druid"], raises=com.OperationNotDefinedError ), - pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), ], ), param( @@ -817,11 +764,8 @@ def convert_to_offset(x): id="date-subtract-interval", marks=[ pytest.mark.notimpl( - ["druid"], - raises=AttributeError, - reason="'StringColumn' object has no attribute 'date'", + ["exasol", "druid"], raises=com.OperationNotDefinedError ), - pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), ], ), param( @@ -832,14 +776,10 @@ def convert_to_offset(x): id="timestamp-subtract-timestamp", marks=[ pytest.mark.notimpl( - ["bigquery", "snowflake", "sqlite"], + ["bigquery", "snowflake", "sqlite", "exasol"], raises=com.OperationNotDefinedError, ), - pytest.mark.notimpl( - ["druid"], - raises=ValidationError, - reason="unsupported operand type(s) for -: 'StringColumn' and 'TimestampScalar'", - ), + pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError), pytest.mark.broken( ["duckdb"], raises=AssertionError, @@ -862,7 +802,6 @@ def convert_to_offset(x): raises=Exception, reason="pyarrow.lib.ArrowInvalid: Casting from duration[us] to duration[s] would lose data", ), - pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), ], ), param( @@ -879,16 +818,8 @@ def convert_to_offset(x): raises=AttributeError, reason="DayTimeIntervalType added in pyspark 3.3", ), - pytest.mark.notimpl(["bigquery"], raises=com.OperationNotDefinedError), pytest.mark.notimpl( - ["druid"], - raises=AttributeError, - reason="'StringColumn' object has no attribute 'date'", - ), - pytest.mark.notimpl( - ["flink"], - raises=com.OperationNotDefinedError, - reason="DATE_DIFF is not supported in Flink", + ["bigquery", "druid", "flink"], raises=com.OperationNotDefinedError ), pytest.mark.broken( ["datafusion"], @@ -929,11 +860,6 @@ def test_temporal_binop(backend, con, alltypes, df, expr_fn, expected_fn): plus, id="large-days-plus", marks=[ - pytest.mark.broken( - ["druid"], - raises=AssertionError, - reason="alltypes.timestamp_col is represented as string", - ), pytest.mark.broken( ["clickhouse"], raises=AssertionError, @@ -948,76 +874,16 @@ def test_temporal_binop(backend, con, alltypes, df, expr_fn, expected_fn): ), ], ), - param( - "5W", - plus, - id="weeks-plus", - marks=[ - pytest.mark.broken( - ["druid"], - raises=AssertionError, - reason="alltypes.timestamp_col is represented as string", - ), - ], - ), - param( - "3d", - plus, - id="three-days-plus", - marks=[ - pytest.mark.broken( - ["druid"], - raises=AssertionError, - reason="alltypes.timestamp_col is represented as string", - ), - ], - ), - param( - "2h", - plus, - id="two-hours-plus", - marks=[ - pytest.mark.broken( - ["druid"], - raises=AssertionError, - reason="alltypes.timestamp_col is represented as string", - ), - ], - ), - param( - "3m", - plus, - id="three-minutes-plus", - marks=[ - pytest.mark.broken( - ["druid"], - raises=AssertionError, - reason="alltypes.timestamp_col is represented as string", - ), - ], - ), - param( - "10s", - plus, - id="ten-seconds-plus", - marks=[ - pytest.mark.broken( - ["druid"], - raises=AssertionError, - reason="alltypes.timestamp_col is represented as string", - ), - ], - ), + param("5W", plus, id="weeks-plus"), + param("3d", plus, id="three-days-plus"), + param("2h", plus, id="two-hours-plus"), + param("3m", plus, id="three-minutes-plus"), + param("10s", plus, id="ten-seconds-plus"), param( "36500d", minus, id="large-days-minus", marks=[ - pytest.mark.broken( - ["druid"], - raises=TypeError, - reason="unsupported operand type(s) for -: 'StringColumn' and 'Timedelta'", - ), pytest.mark.broken( ["clickhouse"], raises=AssertionError, @@ -1032,68 +898,14 @@ def test_temporal_binop(backend, con, alltypes, df, expr_fn, expected_fn): ), ], ), - param( - "5W", - minus, - id="weeks-minus", - marks=[ - pytest.mark.broken( - ["druid"], - raises=TypeError, - reason="unsupported operand type(s) for -: 'StringColumn' and 'Timedelta'", - ), - ], - ), - param( - "3d", - minus, - id="three-days-minus", - marks=[ - pytest.mark.broken( - ["druid"], - raises=TypeError, - reason="unsupported operand type(s) for -: 'StringColumn' and 'Timedelta'", - ), - ], - ), - param( - "2h", - minus, - id="two-hours-minus", - marks=[ - pytest.mark.broken( - ["druid"], - raises=TypeError, - reason="unsupported operand type(s) for -: 'StringColumn' and 'Timedelta'", - ), - ], - ), - param( - "3m", - minus, - id="three-minutes-minus", - marks=[ - pytest.mark.broken( - ["druid"], - raises=TypeError, - reason="unsupported operand type(s) for -: 'StringColumn' and 'Timedelta'", - ), - ], - ), - param( - "10s", - minus, - id="ten-seconds-minus", - marks=[ - pytest.mark.broken( - ["druid"], - raises=TypeError, - reason="unsupported operand type(s) for -: 'StringColumn' and 'Timedelta'", - ), - ], - ), + param("5W", minus, id="weeks-minus"), + param("3d", minus, id="three-days-minus"), + param("2h", minus, id="two-hours-minus"), + param("3m", minus, id="three-minutes-minus"), + param("10s", minus, id="ten-seconds-minus"), ], ) +@pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError) @pytest.mark.notimpl(["sqlite", "mssql"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) def test_temporal_binop_pandas_timedelta( @@ -1116,8 +928,8 @@ def test_temporal_binop_pandas_timedelta( ) @pytest.mark.notimpl( ["druid"], - raises=AttributeError, - reason="Can only use .dt accessor with datetimelike values", + raises=PyDruidProgrammingError, + reason="Invalid SQL; druid doesn't know about TIMESTAMPTZ", ) def test_timestamp_comparison_filter(backend, con, alltypes, df, func_name): ts = pd.Timestamp("20100302", tz="UTC").to_pydatetime() @@ -1165,11 +977,7 @@ def test_timestamp_comparison_filter(backend, con, alltypes, df, func_name): "ne", ], ) -@pytest.mark.broken( - ["druid"], - raises=AttributeError, - reason="Can only use .dt accessor with datetimelike values", -) +@pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError) @pytest.mark.notimpl( ["polars"], raises=BaseException, # pyo3_runtime.PanicException is not a subclass of Exception @@ -1199,14 +1007,9 @@ def test_timestamp_comparison_filter_numpy(backend, con, alltypes, df, func_name @pytest.mark.notimpl( - ["snowflake", "mssql", "exasol"], + ["snowflake", "mssql", "exasol", "druid"], raises=com.OperationNotDefinedError, ) -@pytest.mark.broken( - ["druid"], - raises=AttributeError, - reason="'StringColumn' object has no attribute 'date'", -) def test_interval_add_cast_scalar(backend, alltypes): timestamp_date = alltypes.timestamp_col.date() delta = ibis.literal(10).cast("interval('D')") @@ -1217,14 +1020,9 @@ def test_interval_add_cast_scalar(backend, alltypes): @pytest.mark.notimpl( - ["snowflake", "mssql", "exasol"], + ["snowflake", "mssql", "exasol", "druid"], raises=com.OperationNotDefinedError, ) -@pytest.mark.notimpl( - ["druid"], - raises=AttributeError, - reason="'StringColumn' object has no attribute 'date'", -) @pytest.mark.broken(["flink"], raises=AssertionError, reason="incorrect results") def test_interval_add_cast_column(backend, alltypes, df): timestamp_date = alltypes.timestamp_col.date() @@ -1276,13 +1074,9 @@ def test_interval_add_cast_column(backend, alltypes, df): ), ], ) -@pytest.mark.notimpl(["datafusion"], raises=com.OperationNotDefinedError) -@pytest.mark.broken( - ["druid"], - raises=AttributeError, - reason="'StringColumn' object has no attribute 'strftime'", +@pytest.mark.notimpl( + ["datafusion", "druid", "exasol"], raises=com.OperationNotDefinedError ) -@pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) def test_strftime(backend, alltypes, df, expr_fn, pandas_pattern): expr = expr_fn(alltypes) expected = df.timestamp_col.dt.strftime(pandas_pattern).rename("formatted") @@ -1321,6 +1115,7 @@ def test_strftime(backend, alltypes, df, expr_fn, pandas_pattern): raises=com.UnsupportedArgumentError, reason="PySpark backend does not support timestamp from unix time with unit us. Supported unit is s.", ), + pytest.mark.notimpl(["druid"], raises=com.UnsupportedArgumentError), pytest.mark.notimpl( ["duckdb", "mssql", "clickhouse"], raises=com.UnsupportedOperationError, @@ -1341,10 +1136,11 @@ def test_strftime(backend, alltypes, df, expr_fn, pandas_pattern): raises=com.UnsupportedArgumentError, reason="PySpark backend does not support timestamp from unix time with unit ms. Supported unit is s.", ), + pytest.mark.notimpl(["druid"], raises=com.UnsupportedArgumentError), pytest.mark.notimpl( ["duckdb", "mssql", "clickhouse"], raises=com.UnsupportedOperationError, - reason="`ms` unit is not supported!", + reason="`ns` unit is not supported!", ), pytest.mark.notimpl( ["flink"], @@ -1356,7 +1152,7 @@ def test_strftime(backend, alltypes, df, expr_fn, pandas_pattern): ], ) @pytest.mark.notimpl( - ["mysql", "postgres", "risingwave", "sqlite", "druid", "oracle"], + ["mysql", "postgres", "risingwave", "sqlite", "oracle"], raises=com.OperationNotDefinedError, ) @pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) @@ -1561,13 +1357,7 @@ def test_day_of_week_scalar(con, date, expected_index, expected_day): assert result_day.lower() == expected_day.lower() -@pytest.mark.notimpl(["oracle"], raises=com.OperationNotDefinedError) -@pytest.mark.broken( - ["druid"], - raises=AttributeError, - reason="StringColumn' object has no attribute 'day_of_week'", -) -@pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["oracle", "exasol", "druid"], raises=com.OperationNotDefinedError) @pytest.mark.broken( ["risingwave"], raises=AssertionError, @@ -1612,12 +1402,7 @@ def test_day_of_week_column(backend, alltypes, df): ), ], ) -@pytest.mark.notimpl(["oracle"], raises=com.OperationNotDefinedError) -@pytest.mark.notimpl( - ["druid"], - raises=AttributeError, - reason="'StringColumn' object has no attribute 'day_of_week'", -) +@pytest.mark.notimpl(["oracle", "druid"], raises=com.OperationNotDefinedError) def test_day_of_week_column_group_by( backend, alltypes, df, day_of_week_expr, day_of_week_pandas ): @@ -1686,10 +1471,8 @@ def test_today_from_projection(alltypes): @pytest.mark.notimpl( - ["pandas", "dask", "exasol", "risingwave"], raises=com.OperationNotDefinedError -) -@pytest.mark.notimpl( - ["druid"], raises=PyDruidProgrammingError, reason="SQL parse failed" + ["pandas", "dask", "exasol", "risingwave", "druid"], + raises=com.OperationNotDefinedError, ) @pytest.mark.notimpl( ["oracle"], raises=OracleDatabaseError, reason="ORA-00936 missing expression" @@ -1923,12 +1706,8 @@ def test_interval_literal(con, backend): @pytest.mark.notimpl( - ["pandas", "dask", "exasol", "risingwave"], raises=com.OperationNotDefinedError -) -@pytest.mark.broken( - ["druid"], - raises=AttributeError, - reason="'StringColumn' object has no attribute 'year'", + ["pandas", "dask", "exasol", "risingwave", "druid"], + raises=com.OperationNotDefinedError, ) @pytest.mark.broken( ["oracle"], raises=OracleDatabaseError, reason="ORA-00936: missing expression" @@ -1947,11 +1726,6 @@ def test_date_column_from_ymd(backend, con, alltypes, df): ["pandas", "dask", "pyspark", "mysql", "exasol"], raises=com.OperationNotDefinedError, ) -@pytest.mark.broken( - ["druid"], - raises=AttributeError, - reason="StringColumn' object has no attribute 'year'", -) @pytest.mark.notyet(["impala", "oracle"], raises=com.OperationNotDefinedError) def test_timestamp_column_from_ymdhms(backend, con, alltypes, df): c = alltypes.timestamp_col @@ -2285,16 +2059,12 @@ def test_delta(con, start, end, unit, expected): "pyspark", "sqlite", "trino", + "druid", ], raises=com.OperationNotDefinedError, ) -@pytest.mark.broken( - ["druid"], - raises=AttributeError, - reason="Druid tests load timestamp_col as a string currently", -) @pytest.mark.parametrize( - "kws, pd_freq", + ("kws", "pd_freq"), [ param( {"milliseconds": 50}, @@ -2403,14 +2173,10 @@ def test_timestamp_bucket(backend, kws, pd_freq): "pyspark", "sqlite", "trino", + "druid", ], raises=com.OperationNotDefinedError, ) -@pytest.mark.broken( - ["druid"], - raises=AttributeError, - reason="Druid tests load timestamp_col as a string currently", -) @pytest.mark.notimpl( ["clickhouse", "mssql", "snowflake"], reason="offset arg not supported",