Skip to content

chore: Migrate up to 15 scalar operators to SQLGlot #1938

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 90 additions & 0 deletions bigframes/core/compile/sqlglot/expressions/unary_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,26 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
return sge.Floor(this=expr.expr)


@UNARY_OP_REGISTRATION.register(ops.geo_area_op)
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
return sge.func("ST_AREA", expr.expr)


@UNARY_OP_REGISTRATION.register(ops.geo_st_astext_op)
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
return sge.func("ST_ASTEXT", expr.expr)


@UNARY_OP_REGISTRATION.register(ops.geo_x_op)
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
return sge.func("SAFE.ST_X", expr.expr)


@UNARY_OP_REGISTRATION.register(ops.geo_y_op)
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
return sge.func("SAFE.ST_Y", expr.expr)


@UNARY_OP_REGISTRATION.register(ops.hash_op)
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
return sge.func("FARM_FINGERPRINT", expr.expr)
Expand Down Expand Up @@ -361,6 +381,16 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
return sge.Lower(this=expr.expr)


@UNARY_OP_REGISTRATION.register(ops.minute_op)
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
return sge.Extract(this=sge.Identifier(this="MINUTE"), expression=expr.expr)


@UNARY_OP_REGISTRATION.register(ops.month_op)
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
return sge.Extract(this=sge.Identifier(this="MONTH"), expression=expr.expr)


@UNARY_OP_REGISTRATION.register(ops.StrLstripOp)
def _(op: ops.StrLstripOp, expr: TypedExpr) -> sge.Expression:
return sge.Trim(this=expr.expr, expression=sge.convert(op.to_strip), side="LEFT")
Expand All @@ -371,16 +401,31 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
return sge.Neg(this=expr.expr)


@UNARY_OP_REGISTRATION.register(ops.normalize_op)
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
return sge.TimestampTrunc(this=expr.expr, unit=sge.Identifier(this="DAY"))


@UNARY_OP_REGISTRATION.register(ops.pos_op)
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
return expr.expr


@UNARY_OP_REGISTRATION.register(ops.quarter_op)
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
return sge.Extract(this=sge.Identifier(this="QUARTER"), expression=expr.expr)


@UNARY_OP_REGISTRATION.register(ops.reverse_op)
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
return sge.func("REVERSE", expr.expr)


@UNARY_OP_REGISTRATION.register(ops.second_op)
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
return sge.Extract(this=sge.Identifier(this="SECOND"), expression=expr.expr)


@UNARY_OP_REGISTRATION.register(ops.StrRstripOp)
def _(op: ops.StrRstripOp, expr: TypedExpr) -> sge.Expression:
return sge.Trim(this=expr.expr, expression=sge.convert(op.to_strip), side="RIGHT")
Expand Down Expand Up @@ -414,6 +459,11 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
return sge.Extract(this=sge.Identifier(this="ISOWEEK"), expression=expr.expr)


@UNARY_OP_REGISTRATION.register(ops.iso_year_op)
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
return sge.Extract(this=sge.Identifier(this="ISOYEAR"), expression=expr.expr)


@UNARY_OP_REGISTRATION.register(ops.isnull_op)
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
return sge.Is(this=expr.expr, expression=sge.Null())
Expand Down Expand Up @@ -442,6 +492,31 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
)


@UNARY_OP_REGISTRATION.register(ops.StrGetOp)
def _(op: ops.StrGetOp, expr: TypedExpr) -> sge.Expression:
return sge.Substring(
this=expr.expr,
start=sge.convert(op.i + 1),
length=sge.convert(1),
)


@UNARY_OP_REGISTRATION.register(ops.StrSliceOp)
def _(op: ops.StrSliceOp, expr: TypedExpr) -> sge.Expression:
start = op.start + 1 if op.start is not None else None
if op.end is None:
length = None
elif op.start is None:
length = op.end
else:
length = op.end - op.start
return sge.Substring(
this=expr.expr,
start=sge.convert(start) if start is not None else None,
length=sge.convert(length) if length is not None else None,
)


@UNARY_OP_REGISTRATION.register(ops.tan_op)
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
return sge.func("TAN", expr.expr)
Expand All @@ -452,6 +527,16 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
return sge.func("TANH", expr.expr)


@UNARY_OP_REGISTRATION.register(ops.time_op)
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
return sge.func("TIME", expr.expr)


@UNARY_OP_REGISTRATION.register(ops.timedelta_floor_op)
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
return sge.Floor(this=expr.expr)


# JSON Ops
@UNARY_OP_REGISTRATION.register(ops.JSONExtract)
def _(op: ops.JSONExtract, expr: TypedExpr) -> sge.Expression:
Expand Down Expand Up @@ -501,3 +586,8 @@ def _(op: ops.ToJSONString, expr: TypedExpr) -> sge.Expression:
@UNARY_OP_REGISTRATION.register(ops.upper_op)
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
return sge.Upper(this=expr.expr)


@UNARY_OP_REGISTRATION.register(ops.year_op)
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
return sge.Extract(this=sge.Identifier(this="YEAR"), expression=expr.expr)
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
WITH `bfcte_0` AS (
SELECT
`geography_col` AS `bfcol_0`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
ST_AREA(`bfcol_0`) AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
`bfcol_1` AS `geography_col`
FROM `bfcte_1`
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
WITH `bfcte_0` AS (
SELECT
`geography_col` AS `bfcol_0`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
ST_ASTEXT(`bfcol_0`) AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
`bfcol_1` AS `geography_col`
FROM `bfcte_1`
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
WITH `bfcte_0` AS (
SELECT
`geography_col` AS `bfcol_0`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
SAFE.ST_X(`bfcol_0`) AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
`bfcol_1` AS `geography_col`
FROM `bfcte_1`
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
WITH `bfcte_0` AS (
SELECT
`geography_col` AS `bfcol_0`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
SAFE.ST_Y(`bfcol_0`) AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
`bfcol_1` AS `geography_col`
FROM `bfcte_1`
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
WITH `bfcte_0` AS (
SELECT
`timestamp_col` AS `bfcol_0`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
EXTRACT(ISOYEAR FROM `bfcol_0`) AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
`bfcol_1` AS `timestamp_col`
FROM `bfcte_1`
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
WITH `bfcte_0` AS (
SELECT
`timestamp_col` AS `bfcol_0`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
EXTRACT(MINUTE FROM `bfcol_0`) AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
`bfcol_1` AS `timestamp_col`
FROM `bfcte_1`
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
WITH `bfcte_0` AS (
SELECT
`timestamp_col` AS `bfcol_0`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
EXTRACT(MONTH FROM `bfcol_0`) AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
`bfcol_1` AS `timestamp_col`
FROM `bfcte_1`
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
WITH `bfcte_0` AS (
SELECT
`timestamp_col` AS `bfcol_0`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
TIMESTAMP_TRUNC(`bfcol_0`, DAY) AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
`bfcol_1` AS `timestamp_col`
FROM `bfcte_1`
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
WITH `bfcte_0` AS (
SELECT
`timestamp_col` AS `bfcol_0`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
EXTRACT(QUARTER FROM `bfcol_0`) AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
`bfcol_1` AS `timestamp_col`
FROM `bfcte_1`
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
WITH `bfcte_0` AS (
SELECT
`timestamp_col` AS `bfcol_0`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
EXTRACT(SECOND FROM `bfcol_0`) AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
`bfcol_1` AS `timestamp_col`
FROM `bfcte_1`
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
WITH `bfcte_0` AS (
SELECT
`string_col` AS `bfcol_0`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
SUBSTRING(`bfcol_0`, 2, 1) AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
`bfcol_1` AS `string_col`
FROM `bfcte_1`
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
WITH `bfcte_0` AS (
SELECT
`string_col` AS `bfcol_0`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
SUBSTRING(`bfcol_0`, 2, 2) AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
`bfcol_1` AS `string_col`
FROM `bfcte_1`
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
WITH `bfcte_0` AS (
SELECT
`timestamp_col` AS `bfcol_0`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
TIME(`bfcol_0`) AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
`bfcol_1` AS `timestamp_col`
FROM `bfcte_1`
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
WITH `bfcte_0` AS (
SELECT
`int64_col` AS `bfcol_0`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
FLOOR(`bfcol_0`) AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
`bfcol_1` AS `int64_col`
FROM `bfcte_1`
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
WITH `bfcte_0` AS (
SELECT
`timestamp_col` AS `bfcol_0`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
EXTRACT(YEAR FROM `bfcol_0`) AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
`bfcol_1` AS `timestamp_col`
FROM `bfcte_1`
Loading