Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking β€œSign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add to_string method to SparkLikeExprDateTimeNamespace #1842

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
50 changes: 50 additions & 0 deletions narwhals/_spark_like/expr_dt.py
Original file line number Diff line number Diff line change
@@ -15,6 +15,56 @@ class SparkLikeExprDateTimeNamespace:
def __init__(self: Self, expr: SparkLikeExpr) -> None:
self._compliant_expr = expr

def to_string(self: Self, format: str) -> SparkLikeExpr: # noqa: A002
def _format_iso_week_with_day(_input: Column) -> Column:
"""Format datetime as ISO week string with day."""
year = F.date_format(_input, "YYYY")
EdAbati marked this conversation as resolved.
Show resolved Hide resolved
week = F.lpad(F.weekofyear(_input).cast("string"), 2, "0")
day = F.dayofweek(_input)
# Adjust Sunday from 1 to 7
day = F.when(day == 1, 7).otherwise(day - 1)
return F.concat(year, F.lit("-W"), week, F.lit("-"), day.cast("string"))

def _format_iso_week(_input: Column) -> Column:
"""Format datetime as ISO week string."""
year = F.date_format(_input, "YYYY")
EdAbati marked this conversation as resolved.
Show resolved Hide resolved
week = F.lpad(F.weekofyear(_input).cast("string"), 2, "0")
return F.concat(year, F.lit("-W"), week)

def _format_iso_datetime(_input: Column) -> Column:
"""Format datetime as ISO datetime with microseconds."""
date_part = F.date_format(_input, "yyyy-MM-dd")
time_part = F.date_format(_input, "HH:mm:ss")
micros = F.unix_micros(_input) % 1_000_000
micros_str = F.lpad(micros.cast("string"), 6, "0")
return F.concat(date_part, F.lit("T"), time_part, F.lit("."), micros_str)

def _to_string(_input: Column) -> Column:
# Handle special formats
if format == "%G-W%V":
return _format_iso_week(_input)
if format == "%G-W%V-%u":
return _format_iso_week_with_day(_input)
if format in ("%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S%.f"):
return _format_iso_datetime(_input)

# Standard format conversions
java_fmt = (
format.replace("%Y", "yyyy")
.replace("%m", "MM")
.replace("%d", "dd")
.replace("%H", "HH")
.replace("%M", "mm")
.replace("%S", "ss")
)
FBruzzesi marked this conversation as resolved.
Show resolved Hide resolved
return F.date_format(_input, java_fmt)

return self._compliant_expr._from_call(
_to_string,
"to_string",
returns_scalar=self._compliant_expr._returns_scalar,
)

def date(self: Self) -> SparkLikeExpr:
return self._compliant_expr._from_call(
F.to_date,
1 change: 1 addition & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -159,6 +159,7 @@ def pyspark_lazy_constructor() -> Callable[[Any], IntoFrame]: # pragma: no cove
.config("spark.sql.shuffle.partitions", "2")
# common timezone for all tests environments
.config("spark.sql.session.timeZone", "UTC")
.config("spark.sql.legacy.timeParserPolicy", "LEGACY")
EdAbati marked this conversation as resolved.
Show resolved Hide resolved
.getOrCreate()
)

11 changes: 2 additions & 9 deletions tests/expr_and_series/dt/to_string_test.py
Original file line number Diff line number Diff line change
@@ -59,11 +59,7 @@ def test_dt_to_string_series(constructor_eager: ConstructorEager, fmt: str) -> N
],
)
@pytest.mark.skipif(is_windows(), reason="pyarrow breaking on windows")
def test_dt_to_string_expr(
constructor: Constructor, fmt: str, request: pytest.FixtureRequest
) -> None:
if "pyspark" in str(constructor):
request.applymarker(pytest.mark.xfail)
def test_dt_to_string_expr(constructor: Constructor, fmt: str) -> None:
input_frame = nw.from_native(constructor(data))

expected_col = [datetime.strftime(d, fmt) for d in data["a"]]
@@ -141,7 +137,7 @@ def test_dt_to_string_iso_local_datetime_expr(
expected: str,
request: pytest.FixtureRequest,
) -> None:
if ("pyspark" in str(constructor)) or "duckdb" in str(constructor):
if "duckdb" in str(constructor):
request.applymarker(pytest.mark.xfail)
df = constructor({"a": [data]})

@@ -178,10 +174,7 @@ def test_dt_to_string_iso_local_date_expr(
constructor: Constructor,
data: datetime,
expected: str,
request: pytest.FixtureRequest,
) -> None:
if "pyspark" in str(constructor):
request.applymarker(pytest.mark.xfail)
df = constructor({"a": [data]})
result = nw.from_native(df).with_columns(
nw.col("a").dt.to_string("%Y-%m-%d").alias("b")