Part of Bears-R-Us#3951: Read multiple row groups in Parquet files co…

…rrectly
ajpotts · Dec 27, 2024 · 8a2a40a · 8a2a40a
1 parent 1976c87
commit 8a2a40a
Showing 1 changed file with 27 additions and 1 deletion.
diff --git a/tests/io_test.py b/tests/io_test.py
@@ -1746,7 +1746,6 @@ def test_unsanitized_dataset_names(self, hdf_test_base_tmp):
             ak.to_hdf(my_arrays, f"{tmp_dirname}/bad_dataset_names")
             ak.read_hdf(f"{tmp_dirname}/bad_dataset_names*")
 
-
     def test_hdf_groupby(self, hdf_test_base_tmp):
         # test for categorical and multiple keys
         string = ak.array(["a", "b", "a", "b", "c"])
@@ -2404,6 +2403,33 @@ def test_export_parquet(self, import_export_base_tmp):
                     index=True,
                 )
 
+    def test_parquet_multiindex_dataframes(self, import_export_base_tmp):
+        with tempfile.TemporaryDirectory(dir=import_export_base_tmp) as tmp_dirname:
+            file_name = f"{tmp_dirname}/multi_index.parquet"
+
+            size = 10**8
+
+            arrays = [
+                (np.arange(size) // 2).tolist(),
+                np.arange(size).tolist(),
+            ]
+            tuples = list(zip(*arrays))
+
+            index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
+            s = pd.Series(np.random.randn(size), index=index)
+
+            df = s.to_frame()
+            df.to_parquet(file_name)
+
+            ak_df = ak.DataFrame(ak.read_parquet(file_name))
+            from arkouda.testing import assert_equivalent
+
+            # assert_equivalent(df, ak_df)
+
+            df.loc[:, 0].values
+            round_trip_df = ak_df.to_pandas()
+            round_trip_df.loc[:, "0"].values
+
 
 class TestZarr: