Skip to content

Commit

Permalink
rename margin cache hive columns
Browse files Browse the repository at this point in the history
  • Loading branch information
smcguire-cmu committed Jan 9, 2024
1 parent 0ba5bcb commit 0b06a1c
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 2 deletions.
16 changes: 14 additions & 2 deletions src/hipscat_import/margin_cache/margin_cache_map_reduce.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import healpy as hp
import numpy as np
import pyarrow.dataset as ds
from hipscat import pixel_math
from hipscat.catalog.partition_info import PartitionInfo
from hipscat.io import file_io, paths

# pylint: disable=too-many-locals,too-many-arguments
Expand Down Expand Up @@ -63,13 +65,23 @@ def _to_pixel_shard(data, margin_threshold, output_path, ra_column, dec_column):

final_df = margin_data.drop(
columns=[
"partition_order",
"partition_pixel",
"margin_check",
"margin_pixel",
]
).rename(
columns={
PartitionInfo.METADATA_ORDER_COLUMN_NAME: f"margin_{PartitionInfo.METADATA_ORDER_COLUMN_NAME}",
PartitionInfo.METADATA_DIR_COLUMN_NAME: f"margin_{PartitionInfo.METADATA_DIR_COLUMN_NAME}",
PartitionInfo.METADATA_PIXEL_COLUMN_NAME: f"margin_{PartitionInfo.METADATA_PIXEL_COLUMN_NAME}",
"partition_order": PartitionInfo.METADATA_ORDER_COLUMN_NAME,
"partition_pixel": PartitionInfo.METADATA_PIXEL_COLUMN_NAME
}
)

dir_column = np.floor_divide(final_df[PartitionInfo.METADATA_PIXEL_COLUMN_NAME].values, 10000) * 10000

final_df[PartitionInfo.METADATA_DIR_COLUMN_NAME] = dir_column

final_df.to_parquet(shard_path)

del data, margin_data, final_df
Expand Down
5 changes: 5 additions & 0 deletions tests/hipscat_import/margin_cache/test_margin_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import numpy.testing as npt
import pandas as pd
import pytest
from hipscat.catalog import PartitionInfo
from hipscat.catalog.dataset.dataset import Dataset
from hipscat.io import file_io, paths

Expand Down Expand Up @@ -37,6 +38,10 @@ def test_margin_cache_gen(small_sky_source_catalog, tmp_path, dask_client):

assert len(data) == 13

assert all(data[PartitionInfo.METADATA_ORDER_COLUMN_NAME] == norder)
assert all(data[PartitionInfo.METADATA_PIXEL_COLUMN_NAME] == npix)
assert all(data[PartitionInfo.METADATA_DIR_COLUMN_NAME] == int(npix / 10000) * 10000)

catalog = Dataset.read_from_hipscat(args.catalog_path)
assert catalog.on_disk
assert catalog.catalog_path == args.catalog_path
Expand Down

0 comments on commit 0b06a1c

Please sign in to comment.