From e7c9de6ab042403e07aa1727783085101932f2b0 Mon Sep 17 00:00:00 2001 From: "chris.ochoa" Date: Fri, 29 Mar 2024 17:44:52 +0000 Subject: [PATCH 1/2] feat: update dgp2wicker persistence to use local reduction --- dgp/contribs/dgp2wicker/dgp2wicker/ingest.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dgp/contribs/dgp2wicker/dgp2wicker/ingest.py b/dgp/contribs/dgp2wicker/dgp2wicker/ingest.py index f4307a68..847ee5aa 100644 --- a/dgp/contribs/dgp2wicker/dgp2wicker/ingest.py +++ b/dgp/contribs/dgp2wicker/dgp2wicker/ingest.py @@ -587,4 +587,6 @@ def process_scene( wicker_dataset_version, wicker_dataset_schema, rdd, + local_reduction=True, + sort=True, ) From c92cf7e4a72147729964ab07133ad1ec8529cc5f Mon Sep 17 00:00:00 2001 From: "chris.ochoa" Date: Fri, 29 Mar 2024 21:38:54 +0000 Subject: [PATCH 2/2] add skip missing data --- dgp/contribs/dgp2wicker/dgp2wicker/ingest.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dgp/contribs/dgp2wicker/dgp2wicker/ingest.py b/dgp/contribs/dgp2wicker/dgp2wicker/ingest.py index 847ee5aa..ce1eca68 100644 --- a/dgp/contribs/dgp2wicker/dgp2wicker/ingest.py +++ b/dgp/contribs/dgp2wicker/dgp2wicker/ingest.py @@ -311,6 +311,7 @@ def ingest_dgp_to_wicker( is_pd: bool = False, data_uri: str = None, alternate_scene_uri: str = None, + skip_missing_data: bool = False, ) -> Dict[str, int]: """Ingest DGP dataset into Wicker datastore @@ -420,6 +421,7 @@ def open_scene( dataset_kwargs = deepcopy(dataset_kwargs) dataset_kwargs['scene_json'] = os.path.join(local_path, scene_json) + dataset_kwargs['skip_missing_data'] = skip_missing_data is_pd = dataset_kwargs.pop('is_pd') @@ -524,6 +526,7 @@ def process_scene( ) dataset_kwargs['is_pd'] = is_pd + dataset_kwargs['skip_missing_data'] = skip_missing_data if pipeline is None: pipeline = []