|
1 | | -import pytest |
2 | 1 | import pandas as pd |
| 2 | +import pytest |
3 | 3 |
|
4 | 4 | from ..utils_test import cluster_memory, timeseries_of_size, wait |
5 | 5 |
|
6 | 6 |
|
7 | | -@pytest.mark.skipif() |
8 | | -def test_unique(small_client): |
9 | | - """Find unique values""" |
10 | | - memory = cluster_memory(small_client) |
11 | | - df = timeseries_of_size(memory) |
12 | | - s = df.name.astype(pd.StringDtype("pyarrow")).persist() |
13 | | - result = s.unique() |
14 | | - wait(result, small_client, 10 * 60) |
15 | | - |
16 | | - |
17 | | -def test_contains(small_client): |
18 | | - """String contains""" |
19 | | - memory = cluster_memory(small_client) |
20 | | - df = timeseries_of_size(memory) |
21 | | - s = df.name.astype(pd.StringDtype("pyarrow")).persist() |
22 | | - result = s.str.contains("a") |
23 | | - wait(result, small_client, 10 * 60) |
24 | | - |
25 | | - |
26 | | -def test_startswith(small_client): |
27 | | - """String starts with""" |
| 7 | +@pytest.fixture(params=[True, False]) |
| 8 | +def series_with_client(request, small_client): |
28 | 9 | memory = cluster_memory(small_client) |
29 | 10 | df = timeseries_of_size(memory) |
30 | | - s = df.name.astype(pd.StringDtype("pyarrow")).persist() |
31 | | - result = s.str.startswith("B") |
32 | | - wait(result, small_client, 10 * 60) |
33 | | - |
| 11 | + if request.param: |
| 12 | + series = df.name.astype(pd.StringDtype("pyarrow")) |
| 13 | + series = series.persist() |
| 14 | + yield series, small_client |
34 | 15 |
|
35 | | -def test_filter(small_client): |
36 | | - """How fast can we filter a DataFrame?""" |
37 | | - memory = cluster_memory(small_client) |
38 | | - df = timeseries_of_size(memory) |
39 | | - df.name = df.name.astype(pd.StringDtype("pyarrow")) |
40 | | - df = df.persist() |
41 | | - name = df.head(1).name.iloc[0] # Get first name that appears |
42 | | - result = df[df.name == name] |
43 | | - wait(result, small_client, 10 * 60) |
44 | 16 |
|
45 | | - |
46 | | -def test_value_counts(small_client): |
47 | | - """Value counts on string values""" |
48 | | - memory = cluster_memory(small_client) |
49 | | - df = timeseries_of_size(memory) |
50 | | - s = df.name.astype(pd.StringDtype("pyarrow")).persist() |
51 | | - result = s.value_counts() |
52 | | - wait(result, small_client, 10 * 60) |
| 17 | +def test_unique(series_with_client): |
| 18 | + """Find unique values""" |
| 19 | + series, client = series_with_client |
| 20 | + result = series.unique() |
| 21 | + wait(result, client, 10 * 60) |
| 22 | + |
| 23 | + |
| 24 | +# def test_contains(small_client): |
| 25 | +# """String contains""" |
| 26 | +# memory = cluster_memory(small_client) |
| 27 | +# df = timeseries_of_size(memory) |
| 28 | +# s = df.name.astype(pd.StringDtype("pyarrow")).persist() |
| 29 | +# result = s.str.contains("a") |
| 30 | +# wait(result, small_client, 10 * 60) |
| 31 | +# |
| 32 | +# |
| 33 | +# def test_startswith(small_client): |
| 34 | +# """String starts with""" |
| 35 | +# memory = cluster_memory(small_client) |
| 36 | +# df = timeseries_of_size(memory) |
| 37 | +# s = df.name.astype(pd.StringDtype("pyarrow")).persist() |
| 38 | +# result = s.str.startswith("B") |
| 39 | +# wait(result, small_client, 10 * 60) |
| 40 | +# |
| 41 | +# |
| 42 | +# def test_filter(small_client): |
| 43 | +# """How fast can we filter a DataFrame?""" |
| 44 | +# memory = cluster_memory(small_client) |
| 45 | +# df = timeseries_of_size(memory) |
| 46 | +# df.name = df.name.astype(pd.StringDtype("pyarrow")) |
| 47 | +# df = df.persist() |
| 48 | +# name = df.head(1).name.iloc[0] # Get first name that appears |
| 49 | +# result = df[df.name == name] |
| 50 | +# wait(result, small_client, 10 * 60) |
| 51 | +# |
| 52 | +# |
| 53 | +# def test_value_counts(small_client): |
| 54 | +# """Value counts on string values""" |
| 55 | +# memory = cluster_memory(small_client) |
| 56 | +# df = timeseries_of_size(memory) |
| 57 | +# s = df.name.astype(pd.StringDtype("pyarrow")).persist() |
| 58 | +# result = s.value_counts() |
| 59 | +# wait(result, small_client, 10 * 60) |
0 commit comments