From 6cd3eeeaa88323fd14a4388be9d42a64a22d2ea1 Mon Sep 17 00:00:00 2001
From: Nick Crews <nicholas.b.crews@gmail.com>
Date: Thu, 25 Jul 2024 04:06:35 -0800
Subject: [PATCH] fix(druid): get basic timestamp functionality working (#9692)

Co-authored-by: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
---
 ci/schema/druid.sql                     |  19 +-
 ibis/backends/druid/tests/conftest.py   |  23 +-
 ibis/backends/sql/compilers/druid.py    |  11 +
 ibis/backends/tests/test_aggregation.py |  10 +-
 ibis/backends/tests/test_export.py      |  22 +-
 ibis/backends/tests/test_generic.py     |   1 -
 ibis/backends/tests/test_param.py       |   8 +-
 ibis/backends/tests/test_temporal.py    | 360 +++++-------------------
 8 files changed, 119 insertions(+), 335 deletions(-)

diff --git a/ci/schema/druid.sql b/ci/schema/druid.sql
index 98a9701d8afc..a61af702275f 100644
--- a/ci/schema/druid.sql
+++ b/ci/schema/druid.sql
@@ -36,11 +36,24 @@ PARTITIONED BY ALL TIME;
 
 REPLACE INTO "functional_alltypes"
 OVERWRITE ALL
-SELECT *
+SELECT
+  "id",
+  "bool_col",
+  "tinyint_col",
+  "smallint_col",
+  "int_col",
+  "bigint_col",
+  "float_col",
+  "double_col",
+  "date_string_col",
+  "string_col",
+  TIME_PARSE(CONCAT(REPLACE("timestamp_col", ' ', 'T'), 'Z')) AS "timestamp_col",
+  "year",
+  "month"
 FROM TABLE(
   EXTERN(
-    '{"type":"local","files":["/data/functional_alltypes.parquet"]}',
-    '{"type":"parquet"}',
+    '{"type":"local","files":["/data/functional_alltypes.csv"]}',
+    '{"type":"csv","skipHeaderRows":1,"columns":["id","bool_col","tinyint_col","smallint_col","int_col","bigint_col","float_col","double_col","date_string_col","string_col","timestamp_col","year","month"]}',
     '[{"name":"id","type":"long"},{"name":"bool_col","type":"long"},{"name":"tinyint_col","type":"long"},{"name":"smallint_col","type":"long"},{"name":"int_col","type":"long"},{"name":"bigint_col","type":"long"},{"name":"float_col","type":"double"},{"name":"double_col","type":"double"},{"name":"date_string_col","type":"string"},{"name":"string_col","type":"string"},{"name":"timestamp_col","type":"string"},{"name":"year","type":"long"},{"name":"month","type":"long"}]'
   )
 )
diff --git a/ibis/backends/druid/tests/conftest.py b/ibis/backends/druid/tests/conftest.py
index 9dce305b66bc..306926632cb4 100644
--- a/ibis/backends/druid/tests/conftest.py
+++ b/ibis/backends/druid/tests/conftest.py
@@ -94,7 +94,7 @@ def run_query(session: Session, query: str) -> None:
 class TestConf(ServiceBackendTest):
     # druid has the same rounding behavior as postgres
     check_dtype = False
-    returned_timestamp_unit = "s"
+    returned_timestamp_unit = "ms"
     supports_arrays = False
     native_bool = True
     supports_structs = False
@@ -106,13 +106,26 @@ class TestConf(ServiceBackendTest):
     @property
     def functional_alltypes(self) -> ir.Table:
         t = self.connection.table("functional_alltypes")
-        # The parquet loading for booleans appears to be broken in Druid, so
-        # I'm using this as a workaround to make the data match what's on disk.
-        return t.mutate(bool_col=1 - t.id % 2)
+        return t.mutate(
+            # The parquet loading for booleans appears to be broken in Druid, so
+            # I'm using this as a workaround to make the data match what's on disk.
+            bool_col=1 - t.id % 2,
+            # timestamp_col is loaded as a long because druid's type system is
+            # awful: it does 99% of the work of a proper timestamp type, but
+            # encodes it as an integer. I've never seen or heard of any other
+            # tool that calls itself a time series database or "good for
+            # working with time series", that lacks a first-class timestamp
+            # type.
+            timestamp_col=t.timestamp_col.to_timestamp(unit="ms"),
+        )
 
     @property
     def test_files(self) -> Iterable[Path]:
-        return self.data_dir.joinpath("parquet").glob("*.parquet")
+        return [
+            path
+            for path in self.data_dir.joinpath("parquet").glob("*.parquet")
+            if path.name != "functional_alltypes.parquet"
+        ] + [self.data_dir.joinpath("csv", "functional_alltypes.csv")]
 
     def _load_data(self, **_: Any) -> None:
         """Load test data into a druid backend instance.
diff --git a/ibis/backends/sql/compilers/druid.py b/ibis/backends/sql/compilers/druid.py
index 56196875d9f1..fd7bed49dff1 100644
--- a/ibis/backends/sql/compilers/druid.py
+++ b/ibis/backends/sql/compilers/druid.py
@@ -4,11 +4,13 @@
 import sqlglot.expressions as sge
 import toolz
 
+import ibis.common.exceptions as exc
 import ibis.expr.datatypes as dt
 import ibis.expr.operations as ops
 from ibis.backends.sql.compilers.base import NULL, AggGen, SQLGlotCompiler
 from ibis.backends.sql.datatypes import DruidType
 from ibis.backends.sql.dialects import Druid
+from ibis.common.temporal import TimestampUnit
 
 
 class DruidCompiler(SQLGlotCompiler):
@@ -36,7 +38,9 @@ class DruidCompiler(SQLGlotCompiler):
         ops.ArrayZip,
         ops.CountDistinctStar,
         ops.Covariance,
+        ops.Date,
         ops.DateDelta,
+        ops.DateFromYMD,
         ops.DayOfWeekIndex,
         ops.DayOfWeekName,
         ops.First,
@@ -169,6 +173,13 @@ def visit_Cast(self, op, *, arg, to):
             return self.f.time_parse(arg)
         return super().visit_Cast(op, arg=arg, to=to)
 
+    def visit_TimestampFromUNIX(self, op, *, arg, unit):
+        if unit == TimestampUnit.SECOND:
+            return self.f.millis_to_timestamp(arg * 1_000)
+        elif unit == TimestampUnit.MILLISECOND:
+            return self.f.millis_to_timestamp(arg)
+        raise exc.UnsupportedArgumentError(f"Druid doesn't support {unit} units")
+
     def visit_TimestampFromYMDHMS(
         self, op, *, year, month, day, hours, minutes, seconds
     ):
diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py
index b95eaf676bfe..fcf2d24777bc 100644
--- a/ibis/backends/tests/test_aggregation.py
+++ b/ibis/backends/tests/test_aggregation.py
@@ -110,11 +110,6 @@ def mean_udf(s):
         lambda t: t.timestamp_col.max(),
         lambda t: t.timestamp_col.max(),
         id="timestamp_max",
-        marks=pytest.mark.broken(
-            ["druid"],
-            raises=PyDruidProgrammingError,
-            reason="Max aggregation is not supported for 'STRING' type SQL",
-        ),
     ),
 ]
 
@@ -1174,12 +1169,13 @@ def test_string_quantile(alltypes, func):
     assert result == "a"
 
 
-@pytest.mark.notimpl(["bigquery", "sqlite"], raises=com.OperationNotDefinedError)
+@pytest.mark.notimpl(
+    ["bigquery", "sqlite", "druid"], raises=com.OperationNotDefinedError
+)
 @pytest.mark.notyet(
     ["impala", "mysql", "mssql", "trino", "exasol", "flink"],
     raises=com.OperationNotDefinedError,
 )
-@pytest.mark.broken(["druid"], raises=AttributeError)
 @pytest.mark.notyet(
     ["snowflake"],
     raises=SnowflakeProgrammingError,
diff --git a/ibis/backends/tests/test_export.py b/ibis/backends/tests/test_export.py
index c75d64cf2cd3..9321b1f594e0 100644
--- a/ibis/backends/tests/test_export.py
+++ b/ibis/backends/tests/test_export.py
@@ -28,20 +28,11 @@
 pa = pytest.importorskip("pyarrow")
 
 limit = [
-    param(
-        42,
-        id="limit",
-        # limit not implemented for pandas-family backends
-        marks=[pytest.mark.notimpl(["dask", "pandas"])],
-    ),
+    # limit not implemented for pandas-family backends
+    param(42, id="limit", marks=pytest.mark.notimpl(["dask", "pandas"])),
 ]
 
-no_limit = [
-    param(
-        None,
-        id="nolimit",
-    )
-]
+no_limit = [param(None, id="nolimit")]
 
 limit_no_limit = limit + no_limit
 
@@ -426,7 +417,9 @@ def test_roundtrip_delta(backend, con, alltypes, tmp_path):
 
 
 @pytest.mark.notimpl(
-    ["druid"], raises=AttributeError, reason="string type is used for timestamp_col"
+    ["druid"],
+    raises=PyDruidProgrammingError,
+    reason="Invalid SQL generated; druid doesn't know about TIMESTAMPTZ",
 )
 def test_arrow_timestamp_with_time_zone(alltypes):
     from ibis.formats.pyarrow import PyArrowType
@@ -512,9 +505,8 @@ def test_to_pandas_batches_column(backend, con, n):
     assert sum(map(len, t.to_pandas_batches())) == n
 
 
-@pytest.mark.notimpl(["druid"])
 def test_to_pandas_batches_scalar(backend, con):
-    t = backend.functional_alltypes.timestamp_col.max()
+    t = backend.functional_alltypes.int_col.max()
     expected = t.execute()
 
     result1 = list(con.to_pandas_batches(t))
diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py
index f63d237323fd..f983148325ac 100644
--- a/ibis/backends/tests/test_generic.py
+++ b/ibis/backends/tests/test_generic.py
@@ -1120,7 +1120,6 @@ def test_between(backend, alltypes, df):
     backend.assert_series_equal(result, expected)
 
 
-@pytest.mark.notimpl(["druid"])
 def test_interactive(alltypes, monkeypatch):
     monkeypatch.setattr(ibis.options, "interactive", True)
 
diff --git a/ibis/backends/tests/test_param.py b/ibis/backends/tests/test_param.py
index 78abe2e99f18..239e7524c4af 100644
--- a/ibis/backends/tests/test_param.py
+++ b/ibis/backends/tests/test_param.py
@@ -124,25 +124,19 @@ def test_scalar_param_map(con):
             marks=[pytest.mark.notimpl(["druid"])],
         ),
         param(
-            "2009-01-20 01:02:03",
-            "timestamp",
-            "timestamp_col",
-            id="string_timestamp",
-            marks=[pytest.mark.notimpl(["druid"])],
+            "2009-01-20 01:02:03", "timestamp", "timestamp_col", id="string_timestamp"
         ),
         param(
             datetime.date(2009, 1, 20),
             "timestamp",
             "timestamp_col",
             id="date_timestamp",
-            marks=[pytest.mark.notimpl(["druid"])],
         ),
         param(
             datetime.datetime(2009, 1, 20, 1, 2, 3),
             "timestamp",
             "timestamp_col",
             id="datetime_timestamp",
-            marks=[pytest.mark.notimpl(["druid"])],
         ),
     ],
 )
diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py
index 8fe67904040e..669562478439 100644
--- a/ibis/backends/tests/test_temporal.py
+++ b/ibis/backends/tests/test_temporal.py
@@ -45,12 +45,14 @@
 @pytest.mark.parametrize("attr", ["year", "month", "day"])
 @pytest.mark.parametrize(
     "expr_fn",
-    [param(lambda c: c.date(), id="date"), param(lambda c: c.cast("date"), id="cast")],
-)
-@pytest.mark.notimpl(
-    ["druid"],
-    raises=AttributeError,
-    reason="Can only use .dt accessor with datetimelike values",
+    [
+        param(
+            methodcaller("date"),
+            marks=[pytest.mark.notimpl(["druid"], raises=com.OperationNotDefinedError)],
+            id="date",
+        ),
+        param(methodcaller("cast", "date"), id="cast"),
+    ],
 )
 def test_date_extract(backend, alltypes, df, attr, expr_fn):
     expr = getattr(expr_fn(alltypes.timestamp_col), attr)()
@@ -85,11 +87,6 @@ def test_date_extract(backend, alltypes, df, attr, expr_fn):
         "second",
     ],
 )
-@pytest.mark.notimpl(
-    ["druid"],
-    raises=(AttributeError, com.OperationNotDefinedError),
-    reason="AttributeError: 'StringColumn' object has no attribute 'X'",
-)
 def test_timestamp_extract(backend, alltypes, df, attr):
     method = getattr(alltypes.timestamp_col, attr)
     expr = method().name(attr)
@@ -196,12 +193,7 @@ def test_timestamp_extract_literal(con, func, expected):
     assert con.execute(func(value).name("tmp")) == expected
 
 
-@pytest.mark.notimpl(["oracle"], raises=com.OperationNotDefinedError)
-@pytest.mark.broken(
-    ["druid"],
-    raises=AttributeError,
-    reason="'StringColumn' object has no attribute 'microsecond'",
-)
+@pytest.mark.notimpl(["oracle", "druid"], raises=com.OperationNotDefinedError)
 @pytest.mark.notyet(
     ["pyspark"],
     raises=com.UnsupportedOperationError,
@@ -223,12 +215,7 @@ def test_timestamp_extract_microseconds(backend, alltypes, df):
     backend.assert_series_equal(result, expected)
 
 
-@pytest.mark.notimpl(["oracle"], raises=com.OperationNotDefinedError)
-@pytest.mark.broken(
-    ["druid"],
-    raises=AttributeError,
-    reason="'StringColumn' object has no attribute 'millisecond'",
-)
+@pytest.mark.notimpl(["oracle", "druid"], raises=com.OperationNotDefinedError)
 @pytest.mark.broken(["sqlite"], raises=AssertionError)
 def test_timestamp_extract_milliseconds(backend, alltypes, df):
     expr = alltypes.timestamp_col.millisecond().name("millisecond")
@@ -240,11 +227,7 @@ def test_timestamp_extract_milliseconds(backend, alltypes, df):
 
 
 @pytest.mark.notimpl(["oracle"], raises=com.OperationNotDefinedError)
-@pytest.mark.broken(
-    ["druid"],
-    raises=AttributeError,
-    reason="'StringColumn' object has no attribute 'epoch_seconds'",
-)
+@pytest.mark.broken(["druid"], raises=PyDruidProgrammingError)
 @pytest.mark.broken(
     ["bigquery"],
     raises=GoogleBadRequest,
@@ -270,11 +253,6 @@ def test_timestamp_extract_epoch_seconds(backend, alltypes, df):
 
 
 @pytest.mark.notimpl(["oracle"], raises=com.OperationNotDefinedError)
-@pytest.mark.notimpl(
-    ["druid"],
-    raises=AttributeError,
-    reason="'StringColumn' object has no attribute 'week_of_year'",
-)
 def test_timestamp_extract_week_of_year(backend, alltypes, df):
     expr = alltypes.timestamp_col.week_of_year().name("tmp")
     result = expr.execute()
@@ -361,6 +339,7 @@ def test_timestamp_extract_week_of_year(backend, alltypes, df):
                     ["clickhouse", "mysql", "sqlite", "datafusion", "exasol"],
                     raises=com.UnsupportedOperationError,
                 ),
+                pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError),
             ],
         ),
         param(
@@ -376,6 +355,7 @@ def test_timestamp_extract_week_of_year(backend, alltypes, df):
                     raises=Py4JJavaError,
                     reason="microseconds not supported in truncation",
                 ),
+                pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError),
             ],
         ),
         param(
@@ -398,6 +378,7 @@ def test_timestamp_extract_week_of_year(backend, alltypes, df):
                         "mssql",
                         "datafusion",
                         "exasol",
+                        "druid",
                     ],
                     raises=com.UnsupportedOperationError,
                 ),
@@ -415,11 +396,6 @@ def test_timestamp_extract_week_of_year(backend, alltypes, df):
         ),
     ],
 )
-@pytest.mark.broken(
-    ["druid"],
-    raises=AttributeError,
-    reason="AttributeError: 'StringColumn' object has no attribute 'truncate'",
-)
 def test_timestamp_truncate(backend, alltypes, df, ibis_unit, pandas_unit):
     expr = alltypes.timestamp_col.truncate(ibis_unit).name("tmp")
 
@@ -437,13 +413,12 @@ def test_timestamp_truncate(backend, alltypes, df, ibis_unit, pandas_unit):
 
 
 @pytest.mark.parametrize(
-    ("ibis_unit", "pandas_unit"),
+    "unit",
     [
-        ("Y", "Y"),
-        ("M", "M"),
-        ("D", "D"),
+        "Y",
+        "M",
+        "D",
         param(
-            "W",
             "W",
             marks=[
                 pytest.mark.notyet(["mysql"], raises=com.UnsupportedOperationError),
@@ -456,19 +431,11 @@ def test_timestamp_truncate(backend, alltypes, df, ibis_unit, pandas_unit):
         ),
     ],
 )
-@pytest.mark.broken(["druid"], reason="snaps to the UNIX epoch", raises=AssertionError)
-@pytest.mark.broken(
-    ["druid"],
-    raises=AttributeError,
-    reason="AttributeError: 'StringColumn' object has no attribute 'date'",
-)
-def test_date_truncate(backend, alltypes, df, ibis_unit, pandas_unit):
-    expr = alltypes.timestamp_col.date().truncate(ibis_unit).name("tmp")
+@pytest.mark.notimpl(["druid"], raises=com.OperationNotDefinedError)
+def test_date_truncate(backend, alltypes, df, unit):
+    expr = alltypes.timestamp_col.date().truncate(unit).name("tmp")
 
-    if ibis_unit in ("Y", "M", "D", "W"):
-        expected = df.timestamp_col.dt.to_period(pandas_unit).dt.to_timestamp().dt.date
-    else:
-        expected = df.timestamp_col.dt.floor(pandas_unit).dt.date
+    expected = df.timestamp_col.dt.to_period(unit).dt.to_timestamp().dt.date
 
     result = expr.execute()
     expected = backend.default_series_rename(expected)
@@ -601,15 +568,9 @@ def test_date_truncate(backend, alltypes, df, ibis_unit, pandas_unit):
     ],
 )
 @pytest.mark.notimpl(
-    ["datafusion", "sqlite", "snowflake", "mssql", "oracle"],
+    ["datafusion", "sqlite", "snowflake", "mssql", "oracle", "druid", "exasol"],
     raises=com.OperationNotDefinedError,
 )
-@pytest.mark.notimpl(
-    ["druid"],
-    raises=ValidationError,
-    reason="Given argument with datatype interval('h') is not implicitly castable to string",
-)
-@pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError)
 def test_integer_to_interval_timestamp(
     backend, con, alltypes, df, unit, displacement_type
 ):
@@ -726,11 +687,7 @@ def convert_to_offset(x):
                 pytest.mark.notimpl(
                     ["sqlite", "exasol"], raises=com.OperationNotDefinedError
                 ),
-                pytest.mark.notimpl(
-                    ["druid"],
-                    raises=ValidationError,
-                    reason="Given argument with datatype interval('D') is not implicitly castable to string",
-                ),
+                pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError),
             ],
         ),
         param(
@@ -745,6 +702,7 @@ def convert_to_offset(x):
                     raises=com.OperationNotDefinedError,
                 ),
                 pytest.mark.notimpl(["impala"], raises=com.UnsupportedOperationError),
+                pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError),
                 pytest.mark.notimpl(["mysql"], raises=sg.ParseError),
                 pytest.mark.notimpl(
                     ["druid"],
@@ -766,11 +724,7 @@ def convert_to_offset(x):
                 ),
                 pytest.mark.notimpl(["impala"], raises=com.UnsupportedOperationError),
                 pytest.mark.notimpl(["mysql"], raises=sg.ParseError),
-                pytest.mark.notimpl(
-                    ["druid"],
-                    raises=ValidationError,
-                    reason="alltypes.timestamp_col is represented as string",
-                ),
+                pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError),
             ],
         ),
         param(
@@ -778,14 +732,10 @@ def convert_to_offset(x):
             lambda t, _: t.timestamp_col - pd.Timedelta(days=17),
             id="timestamp-subtract-interval",
             marks=[
-                pytest.mark.notimpl(
-                    ["druid"],
-                    raises=TypeError,
-                    reason="unsupported operand type(s) for -: 'StringColumn' and 'IntervalScalar'",
-                ),
                 pytest.mark.notimpl(
                     ["sqlite", "exasol"], raises=com.OperationNotDefinedError
                 ),
+                pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError),
             ],
         ),
         param(
@@ -799,11 +749,8 @@ def convert_to_offset(x):
             id="date-add-interval",
             marks=[
                 pytest.mark.notimpl(
-                    ["druid"],
-                    raises=AttributeError,
-                    reason="'StringColumn' object has no attribute 'date'",
+                    ["exasol", "druid"], raises=com.OperationNotDefinedError
                 ),
-                pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError),
             ],
         ),
         param(
@@ -817,11 +764,8 @@ def convert_to_offset(x):
             id="date-subtract-interval",
             marks=[
                 pytest.mark.notimpl(
-                    ["druid"],
-                    raises=AttributeError,
-                    reason="'StringColumn' object has no attribute 'date'",
+                    ["exasol", "druid"], raises=com.OperationNotDefinedError
                 ),
-                pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError),
             ],
         ),
         param(
@@ -832,14 +776,10 @@ def convert_to_offset(x):
             id="timestamp-subtract-timestamp",
             marks=[
                 pytest.mark.notimpl(
-                    ["bigquery", "snowflake", "sqlite"],
+                    ["bigquery", "snowflake", "sqlite", "exasol"],
                     raises=com.OperationNotDefinedError,
                 ),
-                pytest.mark.notimpl(
-                    ["druid"],
-                    raises=ValidationError,
-                    reason="unsupported operand type(s) for -: 'StringColumn' and 'TimestampScalar'",
-                ),
+                pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError),
                 pytest.mark.broken(
                     ["duckdb"],
                     raises=AssertionError,
@@ -862,7 +802,6 @@ def convert_to_offset(x):
                     raises=Exception,
                     reason="pyarrow.lib.ArrowInvalid: Casting from duration[us] to duration[s] would lose data",
                 ),
-                pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError),
             ],
         ),
         param(
@@ -879,16 +818,8 @@ def convert_to_offset(x):
                     raises=AttributeError,
                     reason="DayTimeIntervalType added in pyspark 3.3",
                 ),
-                pytest.mark.notimpl(["bigquery"], raises=com.OperationNotDefinedError),
                 pytest.mark.notimpl(
-                    ["druid"],
-                    raises=AttributeError,
-                    reason="'StringColumn' object has no attribute 'date'",
-                ),
-                pytest.mark.notimpl(
-                    ["flink"],
-                    raises=com.OperationNotDefinedError,
-                    reason="DATE_DIFF is not supported in Flink",
+                    ["bigquery", "druid", "flink"], raises=com.OperationNotDefinedError
                 ),
                 pytest.mark.broken(
                     ["datafusion"],
@@ -929,11 +860,6 @@ def test_temporal_binop(backend, con, alltypes, df, expr_fn, expected_fn):
             plus,
             id="large-days-plus",
             marks=[
-                pytest.mark.broken(
-                    ["druid"],
-                    raises=AssertionError,
-                    reason="alltypes.timestamp_col is represented as string",
-                ),
                 pytest.mark.broken(
                     ["clickhouse"],
                     raises=AssertionError,
@@ -948,76 +874,16 @@ def test_temporal_binop(backend, con, alltypes, df, expr_fn, expected_fn):
                 ),
             ],
         ),
-        param(
-            "5W",
-            plus,
-            id="weeks-plus",
-            marks=[
-                pytest.mark.broken(
-                    ["druid"],
-                    raises=AssertionError,
-                    reason="alltypes.timestamp_col is represented as string",
-                ),
-            ],
-        ),
-        param(
-            "3d",
-            plus,
-            id="three-days-plus",
-            marks=[
-                pytest.mark.broken(
-                    ["druid"],
-                    raises=AssertionError,
-                    reason="alltypes.timestamp_col is represented as string",
-                ),
-            ],
-        ),
-        param(
-            "2h",
-            plus,
-            id="two-hours-plus",
-            marks=[
-                pytest.mark.broken(
-                    ["druid"],
-                    raises=AssertionError,
-                    reason="alltypes.timestamp_col is represented as string",
-                ),
-            ],
-        ),
-        param(
-            "3m",
-            plus,
-            id="three-minutes-plus",
-            marks=[
-                pytest.mark.broken(
-                    ["druid"],
-                    raises=AssertionError,
-                    reason="alltypes.timestamp_col is represented as string",
-                ),
-            ],
-        ),
-        param(
-            "10s",
-            plus,
-            id="ten-seconds-plus",
-            marks=[
-                pytest.mark.broken(
-                    ["druid"],
-                    raises=AssertionError,
-                    reason="alltypes.timestamp_col is represented as string",
-                ),
-            ],
-        ),
+        param("5W", plus, id="weeks-plus"),
+        param("3d", plus, id="three-days-plus"),
+        param("2h", plus, id="two-hours-plus"),
+        param("3m", plus, id="three-minutes-plus"),
+        param("10s", plus, id="ten-seconds-plus"),
         param(
             "36500d",
             minus,
             id="large-days-minus",
             marks=[
-                pytest.mark.broken(
-                    ["druid"],
-                    raises=TypeError,
-                    reason="unsupported operand type(s) for -: 'StringColumn' and 'Timedelta'",
-                ),
                 pytest.mark.broken(
                     ["clickhouse"],
                     raises=AssertionError,
@@ -1032,68 +898,14 @@ def test_temporal_binop(backend, con, alltypes, df, expr_fn, expected_fn):
                 ),
             ],
         ),
-        param(
-            "5W",
-            minus,
-            id="weeks-minus",
-            marks=[
-                pytest.mark.broken(
-                    ["druid"],
-                    raises=TypeError,
-                    reason="unsupported operand type(s) for -: 'StringColumn' and 'Timedelta'",
-                ),
-            ],
-        ),
-        param(
-            "3d",
-            minus,
-            id="three-days-minus",
-            marks=[
-                pytest.mark.broken(
-                    ["druid"],
-                    raises=TypeError,
-                    reason="unsupported operand type(s) for -: 'StringColumn' and 'Timedelta'",
-                ),
-            ],
-        ),
-        param(
-            "2h",
-            minus,
-            id="two-hours-minus",
-            marks=[
-                pytest.mark.broken(
-                    ["druid"],
-                    raises=TypeError,
-                    reason="unsupported operand type(s) for -: 'StringColumn' and 'Timedelta'",
-                ),
-            ],
-        ),
-        param(
-            "3m",
-            minus,
-            id="three-minutes-minus",
-            marks=[
-                pytest.mark.broken(
-                    ["druid"],
-                    raises=TypeError,
-                    reason="unsupported operand type(s) for -: 'StringColumn' and 'Timedelta'",
-                ),
-            ],
-        ),
-        param(
-            "10s",
-            minus,
-            id="ten-seconds-minus",
-            marks=[
-                pytest.mark.broken(
-                    ["druid"],
-                    raises=TypeError,
-                    reason="unsupported operand type(s) for -: 'StringColumn' and 'Timedelta'",
-                ),
-            ],
-        ),
+        param("5W", minus, id="weeks-minus"),
+        param("3d", minus, id="three-days-minus"),
+        param("2h", minus, id="two-hours-minus"),
+        param("3m", minus, id="three-minutes-minus"),
+        param("10s", minus, id="ten-seconds-minus"),
     ],
 )
+@pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError)
 @pytest.mark.notimpl(["sqlite", "mssql"], raises=com.OperationNotDefinedError)
 @pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError)
 def test_temporal_binop_pandas_timedelta(
@@ -1116,8 +928,8 @@ def test_temporal_binop_pandas_timedelta(
 )
 @pytest.mark.notimpl(
     ["druid"],
-    raises=AttributeError,
-    reason="Can only use .dt accessor with datetimelike values",
+    raises=PyDruidProgrammingError,
+    reason="Invalid SQL; druid doesn't know about TIMESTAMPTZ",
 )
 def test_timestamp_comparison_filter(backend, con, alltypes, df, func_name):
     ts = pd.Timestamp("20100302", tz="UTC").to_pydatetime()
@@ -1165,11 +977,7 @@ def test_timestamp_comparison_filter(backend, con, alltypes, df, func_name):
         "ne",
     ],
 )
-@pytest.mark.broken(
-    ["druid"],
-    raises=AttributeError,
-    reason="Can only use .dt accessor with datetimelike values",
-)
+@pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError)
 @pytest.mark.notimpl(
     ["polars"],
     raises=BaseException,  # pyo3_runtime.PanicException is not a subclass of Exception
@@ -1199,14 +1007,9 @@ def test_timestamp_comparison_filter_numpy(backend, con, alltypes, df, func_name
 
 
 @pytest.mark.notimpl(
-    ["snowflake", "mssql", "exasol"],
+    ["snowflake", "mssql", "exasol", "druid"],
     raises=com.OperationNotDefinedError,
 )
-@pytest.mark.broken(
-    ["druid"],
-    raises=AttributeError,
-    reason="'StringColumn' object has no attribute 'date'",
-)
 def test_interval_add_cast_scalar(backend, alltypes):
     timestamp_date = alltypes.timestamp_col.date()
     delta = ibis.literal(10).cast("interval('D')")
@@ -1217,14 +1020,9 @@ def test_interval_add_cast_scalar(backend, alltypes):
 
 
 @pytest.mark.notimpl(
-    ["snowflake", "mssql", "exasol"],
+    ["snowflake", "mssql", "exasol", "druid"],
     raises=com.OperationNotDefinedError,
 )
-@pytest.mark.notimpl(
-    ["druid"],
-    raises=AttributeError,
-    reason="'StringColumn' object has no attribute 'date'",
-)
 @pytest.mark.broken(["flink"], raises=AssertionError, reason="incorrect results")
 def test_interval_add_cast_column(backend, alltypes, df):
     timestamp_date = alltypes.timestamp_col.date()
@@ -1276,13 +1074,9 @@ def test_interval_add_cast_column(backend, alltypes, df):
         ),
     ],
 )
-@pytest.mark.notimpl(["datafusion"], raises=com.OperationNotDefinedError)
-@pytest.mark.broken(
-    ["druid"],
-    raises=AttributeError,
-    reason="'StringColumn' object has no attribute 'strftime'",
+@pytest.mark.notimpl(
+    ["datafusion", "druid", "exasol"], raises=com.OperationNotDefinedError
 )
-@pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError)
 def test_strftime(backend, alltypes, df, expr_fn, pandas_pattern):
     expr = expr_fn(alltypes)
     expected = df.timestamp_col.dt.strftime(pandas_pattern).rename("formatted")
@@ -1321,6 +1115,7 @@ def test_strftime(backend, alltypes, df, expr_fn, pandas_pattern):
                     raises=com.UnsupportedArgumentError,
                     reason="PySpark backend does not support timestamp from unix time with unit us. Supported unit is s.",
                 ),
+                pytest.mark.notimpl(["druid"], raises=com.UnsupportedArgumentError),
                 pytest.mark.notimpl(
                     ["duckdb", "mssql", "clickhouse"],
                     raises=com.UnsupportedOperationError,
@@ -1341,10 +1136,11 @@ def test_strftime(backend, alltypes, df, expr_fn, pandas_pattern):
                     raises=com.UnsupportedArgumentError,
                     reason="PySpark backend does not support timestamp from unix time with unit ms. Supported unit is s.",
                 ),
+                pytest.mark.notimpl(["druid"], raises=com.UnsupportedArgumentError),
                 pytest.mark.notimpl(
                     ["duckdb", "mssql", "clickhouse"],
                     raises=com.UnsupportedOperationError,
-                    reason="`ms` unit is not supported!",
+                    reason="`ns` unit is not supported!",
                 ),
                 pytest.mark.notimpl(
                     ["flink"],
@@ -1356,7 +1152,7 @@ def test_strftime(backend, alltypes, df, expr_fn, pandas_pattern):
     ],
 )
 @pytest.mark.notimpl(
-    ["mysql", "postgres", "risingwave", "sqlite", "druid", "oracle"],
+    ["mysql", "postgres", "risingwave", "sqlite", "oracle"],
     raises=com.OperationNotDefinedError,
 )
 @pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError)
@@ -1561,13 +1357,7 @@ def test_day_of_week_scalar(con, date, expected_index, expected_day):
     assert result_day.lower() == expected_day.lower()
 
 
-@pytest.mark.notimpl(["oracle"], raises=com.OperationNotDefinedError)
-@pytest.mark.broken(
-    ["druid"],
-    raises=AttributeError,
-    reason="StringColumn' object has no attribute 'day_of_week'",
-)
-@pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError)
+@pytest.mark.notimpl(["oracle", "exasol", "druid"], raises=com.OperationNotDefinedError)
 @pytest.mark.broken(
     ["risingwave"],
     raises=AssertionError,
@@ -1612,12 +1402,7 @@ def test_day_of_week_column(backend, alltypes, df):
         ),
     ],
 )
-@pytest.mark.notimpl(["oracle"], raises=com.OperationNotDefinedError)
-@pytest.mark.notimpl(
-    ["druid"],
-    raises=AttributeError,
-    reason="'StringColumn' object has no attribute 'day_of_week'",
-)
+@pytest.mark.notimpl(["oracle", "druid"], raises=com.OperationNotDefinedError)
 def test_day_of_week_column_group_by(
     backend, alltypes, df, day_of_week_expr, day_of_week_pandas
 ):
@@ -1686,10 +1471,8 @@ def test_today_from_projection(alltypes):
 
 
 @pytest.mark.notimpl(
-    ["pandas", "dask", "exasol", "risingwave"], raises=com.OperationNotDefinedError
-)
-@pytest.mark.notimpl(
-    ["druid"], raises=PyDruidProgrammingError, reason="SQL parse failed"
+    ["pandas", "dask", "exasol", "risingwave", "druid"],
+    raises=com.OperationNotDefinedError,
 )
 @pytest.mark.notimpl(
     ["oracle"], raises=OracleDatabaseError, reason="ORA-00936 missing expression"
@@ -1923,12 +1706,8 @@ def test_interval_literal(con, backend):
 
 
 @pytest.mark.notimpl(
-    ["pandas", "dask", "exasol", "risingwave"], raises=com.OperationNotDefinedError
-)
-@pytest.mark.broken(
-    ["druid"],
-    raises=AttributeError,
-    reason="'StringColumn' object has no attribute 'year'",
+    ["pandas", "dask", "exasol", "risingwave", "druid"],
+    raises=com.OperationNotDefinedError,
 )
 @pytest.mark.broken(
     ["oracle"], raises=OracleDatabaseError, reason="ORA-00936: missing expression"
@@ -1947,11 +1726,6 @@ def test_date_column_from_ymd(backend, con, alltypes, df):
     ["pandas", "dask", "pyspark", "mysql", "exasol"],
     raises=com.OperationNotDefinedError,
 )
-@pytest.mark.broken(
-    ["druid"],
-    raises=AttributeError,
-    reason="StringColumn' object has no attribute 'year'",
-)
 @pytest.mark.notyet(["impala", "oracle"], raises=com.OperationNotDefinedError)
 def test_timestamp_column_from_ymdhms(backend, con, alltypes, df):
     c = alltypes.timestamp_col
@@ -2285,16 +2059,12 @@ def test_delta(con, start, end, unit, expected):
         "pyspark",
         "sqlite",
         "trino",
+        "druid",
     ],
     raises=com.OperationNotDefinedError,
 )
-@pytest.mark.broken(
-    ["druid"],
-    raises=AttributeError,
-    reason="Druid tests load timestamp_col as a string currently",
-)
 @pytest.mark.parametrize(
-    "kws, pd_freq",
+    ("kws", "pd_freq"),
     [
         param(
             {"milliseconds": 50},
@@ -2403,14 +2173,10 @@ def test_timestamp_bucket(backend, kws, pd_freq):
         "pyspark",
         "sqlite",
         "trino",
+        "druid",
     ],
     raises=com.OperationNotDefinedError,
 )
-@pytest.mark.broken(
-    ["druid"],
-    raises=AttributeError,
-    reason="Druid tests load timestamp_col as a string currently",
-)
 @pytest.mark.notimpl(
     ["clickhouse", "mssql", "snowflake"],
     reason="offset arg not supported",