Skip to content

Commit

Permalink
Part of Bears-R-Us#3951: Read multiple row groups in Parquet files co…
Browse files Browse the repository at this point in the history
…rrectly
  • Loading branch information
ajpotts committed Dec 27, 2024
1 parent 1976c87 commit 8a2a40a
Showing 1 changed file with 27 additions and 1 deletion.
28 changes: 27 additions & 1 deletion tests/io_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1746,7 +1746,6 @@ def test_unsanitized_dataset_names(self, hdf_test_base_tmp):
ak.to_hdf(my_arrays, f"{tmp_dirname}/bad_dataset_names")
ak.read_hdf(f"{tmp_dirname}/bad_dataset_names*")


def test_hdf_groupby(self, hdf_test_base_tmp):
# test for categorical and multiple keys
string = ak.array(["a", "b", "a", "b", "c"])
Expand Down Expand Up @@ -2404,6 +2403,33 @@ def test_export_parquet(self, import_export_base_tmp):
index=True,
)

def test_parquet_multiindex_dataframes(self, import_export_base_tmp):
with tempfile.TemporaryDirectory(dir=import_export_base_tmp) as tmp_dirname:
file_name = f"{tmp_dirname}/multi_index.parquet"

size = 10**8

arrays = [
(np.arange(size) // 2).tolist(),
np.arange(size).tolist(),
]
tuples = list(zip(*arrays))

index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
s = pd.Series(np.random.randn(size), index=index)

df = s.to_frame()
df.to_parquet(file_name)

ak_df = ak.DataFrame(ak.read_parquet(file_name))
from arkouda.testing import assert_equivalent

# assert_equivalent(df, ak_df)

df.loc[:, 0].values
round_trip_df = ak_df.to_pandas()
round_trip_df.loc[:, "0"].values


class TestZarr:

Expand Down

0 comments on commit 8a2a40a

Please sign in to comment.