diff --git a/src/hats_import/catalog/map_reduce.py b/src/hats_import/catalog/map_reduce.py index ec86615..d8d15cf 100644 --- a/src/hats_import/catalog/map_reduce.py +++ b/src/hats_import/catalog/map_reduce.py @@ -187,7 +187,9 @@ def split_pixels( filtered_data = data.iloc[unique_inverse == unique_index] if _has_named_index(filtered_data): filtered_data = filtered_data.reset_index() - filtered_data = pa.Table.from_pandas(filtered_data, preserve_index=False) + filtered_data = pa.Table.from_pandas( + filtered_data, preserve_index=False + ).replace_schema_metadata() else: filtered_data = data.filter(unique_inverse == unique_index) diff --git a/tests/data/small_sky_object_catalog/dataset/Norder=0/Dir=0/Npix=11.parquet b/tests/data/small_sky_object_catalog/dataset/Norder=0/Dir=0/Npix=11.parquet index d3217ae..26f5bb8 100644 Binary files a/tests/data/small_sky_object_catalog/dataset/Norder=0/Dir=0/Npix=11.parquet and b/tests/data/small_sky_object_catalog/dataset/Norder=0/Dir=0/Npix=11.parquet differ diff --git a/tests/data/small_sky_object_catalog/dataset/_common_metadata b/tests/data/small_sky_object_catalog/dataset/_common_metadata index ba86110..2e494c1 100644 Binary files a/tests/data/small_sky_object_catalog/dataset/_common_metadata and b/tests/data/small_sky_object_catalog/dataset/_common_metadata differ diff --git a/tests/data/small_sky_object_catalog/dataset/_metadata b/tests/data/small_sky_object_catalog/dataset/_metadata index 4b2440b..273da9e 100644 Binary files a/tests/data/small_sky_object_catalog/dataset/_metadata and b/tests/data/small_sky_object_catalog/dataset/_metadata differ diff --git a/tests/data/small_sky_object_catalog/point_map.fits b/tests/data/small_sky_object_catalog/point_map.fits index 1b6b629..f01050a 100644 Binary files a/tests/data/small_sky_object_catalog/point_map.fits and b/tests/data/small_sky_object_catalog/point_map.fits differ diff --git a/tests/data/small_sky_object_catalog/properties b/tests/data/small_sky_object_catalog/properties index 55d467d..fb1a376 100644 --- a/tests/data/small_sky_object_catalog/properties +++ b/tests/data/small_sky_object_catalog/properties @@ -7,8 +7,8 @@ hats_col_dec=dec hats_max_rows=1000000 hats_order=0 moc_sky_fraction=0.08333 -hats_builder=hats-import v0.4.1.dev2+gaeb92ae -hats_creation_date=2024-10-21T13\:22UTC -hats_estsize=70 +hats_builder=hats-import v0.4.2.dev1+g57aaa9d +hats_creation_date=2024-11-07T15\:20UTC +hats_estsize=113 hats_release_date=2024-09-18 hats_version=v0.1 diff --git a/tests/data/small_sky_source_catalog/dataset/Norder=0/Dir=0/Npix=4.parquet b/tests/data/small_sky_source_catalog/dataset/Norder=0/Dir=0/Npix=4.parquet index 3f82fdf..160167a 100644 Binary files a/tests/data/small_sky_source_catalog/dataset/Norder=0/Dir=0/Npix=4.parquet and b/tests/data/small_sky_source_catalog/dataset/Norder=0/Dir=0/Npix=4.parquet differ diff --git a/tests/data/small_sky_source_catalog/dataset/Norder=1/Dir=0/Npix=47.parquet b/tests/data/small_sky_source_catalog/dataset/Norder=1/Dir=0/Npix=47.parquet index aad2a92..6f35d24 100644 Binary files a/tests/data/small_sky_source_catalog/dataset/Norder=1/Dir=0/Npix=47.parquet and b/tests/data/small_sky_source_catalog/dataset/Norder=1/Dir=0/Npix=47.parquet differ diff --git a/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=176.parquet b/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=176.parquet index c415393..d7e098f 100644 Binary files a/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=176.parquet and b/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=176.parquet differ diff --git a/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=177.parquet b/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=177.parquet index 55462bf..807a5d7 100644 Binary files a/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=177.parquet and b/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=177.parquet differ diff --git a/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=178.parquet b/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=178.parquet index 5254a3d..10aa8f4 100644 Binary files a/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=178.parquet and b/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=178.parquet differ diff --git a/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=179.parquet b/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=179.parquet index b8011d3..2cd03b3 100644 Binary files a/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=179.parquet and b/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=179.parquet differ diff --git a/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=180.parquet b/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=180.parquet index 95f1fef..8e17974 100644 Binary files a/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=180.parquet and b/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=180.parquet differ diff --git a/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=181.parquet b/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=181.parquet index 37356aa..64e0920 100644 Binary files a/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=181.parquet and b/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=181.parquet differ diff --git a/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=182.parquet b/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=182.parquet index 276cf2c..3717359 100644 Binary files a/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=182.parquet and b/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=182.parquet differ diff --git a/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=183.parquet b/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=183.parquet index ccd2588..f2c11a4 100644 Binary files a/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=183.parquet and b/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=183.parquet differ diff --git a/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=184.parquet b/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=184.parquet index 6a7ced6..12a374a 100644 Binary files a/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=184.parquet and b/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=184.parquet differ diff --git a/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=185.parquet b/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=185.parquet index dbcbebd..6ae8abb 100644 Binary files a/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=185.parquet and b/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=185.parquet differ diff --git a/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=186.parquet b/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=186.parquet index bc6c417..5437a00 100644 Binary files a/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=186.parquet and b/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=186.parquet differ diff --git a/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=187.parquet b/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=187.parquet index d264fe6..e0ce400 100644 Binary files a/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=187.parquet and b/tests/data/small_sky_source_catalog/dataset/Norder=2/Dir=0/Npix=187.parquet differ diff --git a/tests/data/small_sky_source_catalog/dataset/_common_metadata b/tests/data/small_sky_source_catalog/dataset/_common_metadata index 319e050..c45c305 100644 Binary files a/tests/data/small_sky_source_catalog/dataset/_common_metadata and b/tests/data/small_sky_source_catalog/dataset/_common_metadata differ diff --git a/tests/data/small_sky_source_catalog/dataset/_metadata b/tests/data/small_sky_source_catalog/dataset/_metadata index cca27e7..867674b 100644 Binary files a/tests/data/small_sky_source_catalog/dataset/_metadata and b/tests/data/small_sky_source_catalog/dataset/_metadata differ diff --git a/tests/data/small_sky_source_catalog/point_map.fits b/tests/data/small_sky_source_catalog/point_map.fits index 4337a2c..6462b60 100644 Binary files a/tests/data/small_sky_source_catalog/point_map.fits and b/tests/data/small_sky_source_catalog/point_map.fits differ diff --git a/tests/data/small_sky_source_catalog/properties b/tests/data/small_sky_source_catalog/properties index d6d8704..33c9eb0 100644 --- a/tests/data/small_sky_source_catalog/properties +++ b/tests/data/small_sky_source_catalog/properties @@ -7,8 +7,8 @@ hats_col_dec=source_dec hats_max_rows=3000 hats_order=2 moc_sky_fraction=0.16667 -hats_builder=hats-import v0.4.1.dev2+gaeb92ae -hats_creation_date=2024-10-21T13\:22UTC -hats_estsize=1083 +hats_builder=hats-import v0.4.2.dev1+g57aaa9d +hats_creation_date=2024-11-07T15\:20UTC +hats_estsize=1086 hats_release_date=2024-09-18 hats_version=v0.1 diff --git a/tests/hats_import/catalog/test_run_import.py b/tests/hats_import/catalog/test_run_import.py index deea8ba..3070de7 100644 --- a/tests/hats_import/catalog/test_run_import.py +++ b/tests/hats_import/catalog/test_run_import.py @@ -290,9 +290,9 @@ def test_dask_runner( ] ) schema = pq.read_metadata(output_file).schema.to_arrow_schema() - assert schema.equals(expected_parquet_schema, check_metadata=False) + assert schema.equals(expected_parquet_schema) schema = pq.read_metadata(args.catalog_path / "dataset" / "_metadata").schema.to_arrow_schema() - assert schema.equals(expected_parquet_schema, check_metadata=False) + assert schema.equals(expected_parquet_schema) # Check that, when re-loaded as a pandas dataframe, the appropriate numeric types are used. data_frame = pd.read_parquet(output_file, engine="pyarrow") diff --git a/tests/hats_import/catalog/test_run_round_trip.py b/tests/hats_import/catalog/test_run_round_trip.py index b7d6b00..f21491a 100644 --- a/tests/hats_import/catalog/test_run_round_trip.py +++ b/tests/hats_import/catalog/test_run_round_trip.py @@ -116,9 +116,9 @@ def test_import_mixed_schema_csv( ] ) schema = pq.read_metadata(output_file).schema.to_arrow_schema() - assert schema.equals(expected_parquet_schema, check_metadata=False) + assert schema.equals(expected_parquet_schema) schema = pq.read_metadata(args.catalog_path / "dataset" / "_metadata").schema.to_arrow_schema() - assert schema.equals(expected_parquet_schema, check_metadata=False) + assert schema.equals(expected_parquet_schema) @pytest.mark.dask @@ -549,9 +549,9 @@ def test_import_pyarrow_types( ] ) schema = pq.read_metadata(output_file).schema.to_arrow_schema() - assert schema.equals(expected_parquet_schema, check_metadata=False) + assert schema.equals(expected_parquet_schema) schema = pq.read_metadata(args.catalog_path / "dataset" / "_metadata").schema.to_arrow_schema() - assert schema.equals(expected_parquet_schema, check_metadata=False) + assert schema.equals(expected_parquet_schema) class SimplePyarrowCsvReader(CsvReader): @@ -896,13 +896,13 @@ def test_gaia_ecsv( expected_parquet_schema = pq.read_metadata(schema_path).schema.to_arrow_schema() schema = pq.read_metadata(output_file).schema.to_arrow_schema() - assert schema.equals(expected_parquet_schema, check_metadata=False) + assert schema.equals(expected_parquet_schema) schema = pq.read_metadata(args.catalog_path / "dataset" / "_metadata").schema.to_arrow_schema() - assert schema.equals(expected_parquet_schema, check_metadata=False) + assert schema.equals(expected_parquet_schema) schema = pq.read_metadata(args.catalog_path / "dataset" / "_common_metadata").schema.to_arrow_schema() - assert schema.equals(expected_parquet_schema, check_metadata=False) + assert schema.equals(expected_parquet_schema) schema = pds.dataset(args.catalog_path, format="parquet").schema - assert schema.equals(expected_parquet_schema, check_metadata=False) + assert schema.equals(expected_parquet_schema) @pytest.mark.dask diff --git a/tests/hats_import/hipscat_conversion/test_run_conversion.py b/tests/hats_import/hipscat_conversion/test_run_conversion.py index 34a7642..1479a9e 100644 --- a/tests/hats_import/hipscat_conversion/test_run_conversion.py +++ b/tests/hats_import/hipscat_conversion/test_run_conversion.py @@ -70,13 +70,13 @@ def test_run_conversion_object( ] ) schema = pq.read_metadata(output_file).schema.to_arrow_schema() - assert schema.equals(expected_parquet_schema, check_metadata=False) + assert schema.equals(expected_parquet_schema) assert schema.metadata is None schema = pq.read_metadata(args.catalog_path / "dataset" / "_metadata").schema.to_arrow_schema() - assert schema.equals(expected_parquet_schema, check_metadata=False) + assert schema.equals(expected_parquet_schema) assert schema.metadata is None schema = pq.read_metadata(args.catalog_path / "dataset" / "_common_metadata").schema.to_arrow_schema() - assert schema.equals(expected_parquet_schema, check_metadata=False) + assert schema.equals(expected_parquet_schema) assert schema.metadata is None data = file_io.read_parquet_file_to_pandas( diff --git a/tests/hats_import/index/test_run_index.py b/tests/hats_import/index/test_run_index.py index 7a9bf29..f965649 100644 --- a/tests/hats_import/index/test_run_index.py +++ b/tests/hats_import/index/test_run_index.py @@ -57,13 +57,13 @@ def test_run_index( outfile = args.catalog_path / "dataset" / "index" / "part.0.parquet" schema = pq.read_metadata(outfile).schema.to_arrow_schema() - assert schema.equals(basic_index_parquet_schema, check_metadata=False) + assert schema.equals(basic_index_parquet_schema) schema = pq.read_metadata(args.catalog_path / "dataset" / "_metadata").schema.to_arrow_schema() - assert schema.equals(basic_index_parquet_schema, check_metadata=False) + assert schema.equals(basic_index_parquet_schema) schema = pq.read_metadata(args.catalog_path / "dataset" / "_common_metadata").schema.to_arrow_schema() - assert schema.equals(basic_index_parquet_schema, check_metadata=False) + assert schema.equals(basic_index_parquet_schema) @pytest.mark.dask @@ -104,13 +104,13 @@ def test_run_index_on_source( outfile = args.catalog_path / "dataset" / "index" / "part.0.parquet" schema = pq.read_metadata(outfile).schema.to_arrow_schema() - assert schema.equals(basic_index_parquet_schema, check_metadata=False) + assert schema.equals(basic_index_parquet_schema) schema = pq.read_metadata(args.catalog_path / "dataset" / "_metadata").schema.to_arrow_schema() - assert schema.equals(basic_index_parquet_schema, check_metadata=False) + assert schema.equals(basic_index_parquet_schema) schema = pq.read_metadata(args.catalog_path / "dataset" / "_common_metadata").schema.to_arrow_schema() - assert schema.equals(basic_index_parquet_schema, check_metadata=False) + assert schema.equals(basic_index_parquet_schema) @pytest.mark.dask @@ -148,7 +148,7 @@ def test_run_index_on_source_object_id( outfile = args.catalog_path / "dataset" / "index" / "part.0.parquet" schema = pq.read_metadata(outfile).schema.to_arrow_schema() - assert schema.equals(basic_index_parquet_schema, check_metadata=False) + assert schema.equals(basic_index_parquet_schema) id_range = np.arange(700, 831) ## Some of the objects have sources that span two source partitions. @@ -158,7 +158,7 @@ def test_run_index_on_source_object_id( assert_parquet_file_index(outfile, doubled_up) schema = pq.read_metadata(args.catalog_path / "dataset" / "_metadata").schema.to_arrow_schema() - assert schema.equals(basic_index_parquet_schema, check_metadata=False) + assert schema.equals(basic_index_parquet_schema) schema = pq.read_metadata(args.catalog_path / "dataset" / "_common_metadata").schema.to_arrow_schema() - assert schema.equals(basic_index_parquet_schema, check_metadata=False) + assert schema.equals(basic_index_parquet_schema) diff --git a/tests/hats_import/soap/test_run_soap.py b/tests/hats_import/soap/test_run_soap.py index 48b53d4..8153368 100644 --- a/tests/hats_import/soap/test_run_soap.py +++ b/tests/hats_import/soap/test_run_soap.py @@ -110,7 +110,7 @@ def test_object_to_source_with_leaves( pa.field("join_Npix", pa.uint64()), ] ) - assert parquet_file.metadata.schema.to_arrow_schema().equals(exepcted_schema, check_metadata=False) + assert parquet_file.metadata.schema.to_arrow_schema().equals(exepcted_schema) @pytest.mark.dask @@ -161,4 +161,4 @@ def test_object_to_source_with_leaves_drop_duplicates( pa.field("join_Npix", pa.uint64()), ] ) - assert parquet_file.metadata.schema.to_arrow_schema().equals(exepcted_schema, check_metadata=False) + assert parquet_file.metadata.schema.to_arrow_schema().equals(exepcted_schema)