Skip to content

Commit

Permalink
fix compute / feather tests
Browse files Browse the repository at this point in the history
  • Loading branch information
jorisvandenbossche committed Nov 13, 2024
1 parent 136b091 commit e5db09f
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 10 deletions.
19 changes: 10 additions & 9 deletions python/pyarrow/tests/test_compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -1020,7 +1020,7 @@ def test_replace_slice():
offsets = range(-3, 4)

arr = pa.array([None, '', 'a', 'ab', 'abc', 'abcd', 'abcde'])
series = arr.to_pandas()
series = arr.to_pandas().astype(object).replace({np.nan: None})
for start in offsets:
for stop in offsets:
expected = series.str.slice_replace(start, stop, 'XX')
Expand All @@ -1031,7 +1031,7 @@ def test_replace_slice():
assert pc.binary_replace_slice(arr, start, stop, 'XX') == actual

arr = pa.array([None, '', 'π', 'πb', 'πbθ', 'πbθd', 'πbθde'])
series = arr.to_pandas()
series = arr.to_pandas().astype(object).replace({np.nan: None})
for start in offsets:
for stop in offsets:
expected = series.str.slice_replace(start, stop, 'XX')
Expand Down Expand Up @@ -2125,50 +2125,51 @@ def test_strftime():
for fmt in formats:
options = pc.StrftimeOptions(fmt)
result = pc.strftime(tsa, options=options)
expected = pa.array(ts.strftime(fmt))
# cast to the same type as result to ignore string vs large_string
expected = pa.array(ts.strftime(fmt)).cast(result.type)
assert result.equals(expected)

fmt = "%Y-%m-%dT%H:%M:%S"

# Default format
tsa = pa.array(ts, type=pa.timestamp("s", timezone))
result = pc.strftime(tsa, options=pc.StrftimeOptions())
expected = pa.array(ts.strftime(fmt))
expected = pa.array(ts.strftime(fmt)).cast(result.type)
assert result.equals(expected)

# Default format plus timezone
tsa = pa.array(ts, type=pa.timestamp("s", timezone))
result = pc.strftime(tsa, options=pc.StrftimeOptions(fmt + "%Z"))
expected = pa.array(ts.strftime(fmt + "%Z"))
expected = pa.array(ts.strftime(fmt + "%Z")).cast(result.type)
assert result.equals(expected)

# Pandas %S is equivalent to %S in arrow for unit="s"
tsa = pa.array(ts, type=pa.timestamp("s", timezone))
options = pc.StrftimeOptions("%S")
result = pc.strftime(tsa, options=options)
expected = pa.array(ts.strftime("%S"))
expected = pa.array(ts.strftime("%S")).cast(result.type)
assert result.equals(expected)

# Pandas %S.%f is equivalent to %S in arrow for unit="us"
tsa = pa.array(ts, type=pa.timestamp("us", timezone))
options = pc.StrftimeOptions("%S")
result = pc.strftime(tsa, options=options)
expected = pa.array(ts.strftime("%S.%f"))
expected = pa.array(ts.strftime("%S.%f")).cast(result.type)
assert result.equals(expected)

# Test setting locale
tsa = pa.array(ts, type=pa.timestamp("s", timezone))
options = pc.StrftimeOptions(fmt, locale="C")
result = pc.strftime(tsa, options=options)
expected = pa.array(ts.strftime(fmt))
expected = pa.array(ts.strftime(fmt)).cast(result.type)
assert result.equals(expected)

# Test timestamps without timezone
fmt = "%Y-%m-%dT%H:%M:%S"
ts = pd.to_datetime(times)
tsa = pa.array(ts, type=pa.timestamp("s"))
result = pc.strftime(tsa, options=pc.StrftimeOptions(fmt))
expected = pa.array(ts.strftime(fmt))
expected = pa.array(ts.strftime(fmt)).cast(result.type)

# Positional format
assert pc.strftime(tsa, fmt) == result
Expand Down
6 changes: 5 additions & 1 deletion python/pyarrow/tests/test_feather.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,11 @@ def test_empty_strings(version):
@pytest.mark.pandas
def test_all_none(version):
df = pd.DataFrame({'all_none': [None] * 10})
_check_pandas_roundtrip(df, version=version)
if version == 1:
expected = df.astype("str")
else:
expected = df
_check_pandas_roundtrip(df, version=version, expected=expected)


@pytest.mark.pandas
Expand Down

0 comments on commit e5db09f

Please sign in to comment.