From 7a6bd96b9df6141e183d2bd87b8037d4a20b22e8 Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Mon, 9 Oct 2023 04:36:08 +0000 Subject: [PATCH 1/6] update archive_less_mature option to use int/None instead of bool to be in line with core --- .../dc_tools/odc/apps/dc_tools/azure_to_dc.py | 2 +- .../odc/apps/dc_tools/cop_dem_to_dc.py | 2 +- .../odc/apps/dc_tools/esa_worldcover_to_dc.py | 4 +-- .../odc/apps/dc_tools/stac_api_to_dc.py | 4 +-- apps/dc_tools/odc/apps/dc_tools/utils.py | 29 ++++++++++++++----- 5 files changed, 28 insertions(+), 13 deletions(-) diff --git a/apps/dc_tools/odc/apps/dc_tools/azure_to_dc.py b/apps/dc_tools/odc/apps/dc_tools/azure_to_dc.py index 2fb90f7ab..5b220df93 100644 --- a/apps/dc_tools/odc/apps/dc_tools/azure_to_dc.py +++ b/apps/dc_tools/odc/apps/dc_tools/azure_to_dc.py @@ -102,7 +102,7 @@ def cli( allow_unsafe: bool, stac: bool, statsd_setting: str, - archive_less_mature: bool, + archive_less_mature: int, publish_action: str, account_url: str, container_name: str, diff --git a/apps/dc_tools/odc/apps/dc_tools/cop_dem_to_dc.py b/apps/dc_tools/odc/apps/dc_tools/cop_dem_to_dc.py index 3a9cc06e5..0acd4b788 100644 --- a/apps/dc_tools/odc/apps/dc_tools/cop_dem_to_dc.py +++ b/apps/dc_tools/odc/apps/dc_tools/cop_dem_to_dc.py @@ -107,7 +107,7 @@ def process_uri_tile( dc: Datacube, doc2ds: Doc2Dataset, update_if_exists: bool = True, - archive_less_mature: bool = False, + archive_less_mature: int = None, publish_action: str = None, ) -> Tuple[pystac.Item, str]: product_name = f"dem_{product}" diff --git a/apps/dc_tools/odc/apps/dc_tools/esa_worldcover_to_dc.py b/apps/dc_tools/odc/apps/dc_tools/esa_worldcover_to_dc.py index a1bd75286..8474bc410 100644 --- a/apps/dc_tools/odc/apps/dc_tools/esa_worldcover_to_dc.py +++ b/apps/dc_tools/odc/apps/dc_tools/esa_worldcover_to_dc.py @@ -117,7 +117,7 @@ def process_uri_tile( dc: Datacube, doc2ds: Doc2Dataset, update_if_exists: bool = True, - archive_less_mature: bool = False, + archive_less_mature: int = None, publish_action: str = None, ) -> Tuple[pystac.Item, str]: product_name = "esa_worldcover_" + map_version["year"] @@ -166,7 +166,7 @@ def esa_wc_to_dc( limit: int, update: bool, n_workers: int = 100, - archive_less_mature: bool = False, + archive_less_mature: int = None, publish_action: str = None, ) -> Tuple[int, int]: doc2ds = Doc2Dataset(dc.index) diff --git a/apps/dc_tools/odc/apps/dc_tools/stac_api_to_dc.py b/apps/dc_tools/odc/apps/dc_tools/stac_api_to_dc.py index 34ec66b8d..35863f260 100644 --- a/apps/dc_tools/odc/apps/dc_tools/stac_api_to_dc.py +++ b/apps/dc_tools/odc/apps/dc_tools/stac_api_to_dc.py @@ -125,7 +125,7 @@ def process_item( allow_unsafe: bool, rewrite: Optional[Tuple[str, str]] = None, rename_product: Optional[str] = None, - archive_less_mature: bool = False, + archive_less_mature: int = None, publish_action: bool = False, ): meta, uri, stac = item_to_meta_uri(item, rewrite, rename_product) @@ -150,7 +150,7 @@ def stac_api_to_odc( allow_unsafe: bool = True, rewrite: Optional[Tuple[str, str]] = None, rename_product: Optional[str] = None, - archive_less_mature: bool = False, + archive_less_mature: int = None, publish_action: Optional[str] = None, ) -> Tuple[int, int, int]: doc2ds = Doc2Dataset(dc.index) diff --git a/apps/dc_tools/odc/apps/dc_tools/utils.py b/apps/dc_tools/odc/apps/dc_tools/utils.py index 05409d441..fa7f00bcb 100644 --- a/apps/dc_tools/odc/apps/dc_tools/utils.py +++ b/apps/dc_tools/odc/apps/dc_tools/utils.py @@ -122,6 +122,20 @@ class SkippedException(Exception): "fail if a matching dataset with higher or equal dataset-maturity." ), ) +archive_less_mature = click.option( + "--archive-less-mature", + is_flag=False, + flag_value=500, + default=None, + help=( + "Archive existing any datasets that match product, " + "time and region-code, but have lower dataset-maturity." + "Note: An error will be raised and the dataset add will " + "fail if a matching dataset with higher or equal dataset-maturity." + "Can specify an of leniency for comparing timestamps, provided in milliseconds. " + "Default value is 500ms." + ), +) publish_action = click.option( "--publish-action", @@ -176,7 +190,7 @@ def index_update_dataset( update: bool = False, update_if_exists: bool = False, allow_unsafe: bool = False, - archive_less_mature: Optional[Union[bool, Iterable[str]]] = None, + archive_less_mature: Optional[int] = None, publish_action: Optional[str] = None, stac_doc: Optional[dict] = None, ) -> int: @@ -191,13 +205,12 @@ def index_update_dataset( :param update_if_exists: If true allow insert or update. :param allow_unsafe: Allow unsafe (arbitrary) dataset updates. :param archive_less_mature: Enforce dataset maturity. - * If None (the default) or False or an empty iterable, ignore dataset maturity. + * If None (the default) or False, ignore dataset maturity. * If True, enforce dataset maturity by looking for existing datasets with same product, region_code and time values. If a less mature match is found, it is archived and replaced with the new dataset being inserted. If a match of the same or greater maturity is found a SkippedException is raised. - * If an iterable of valid search field names is provided, it is used as the "grouping" fields for - identifying dataset maturity matches. - (i.e. `archive_less_mature=True` is the same as `archive_less_mature=['region_code', 'time']) + * If an integer is provided, it is used as the timedelta value for allowing a leniency when comparing timestamp + values, for datasets where there is a slight discrepancy. Default is 500ms. :param publish_action: SNS topic arn to publish action to. :param stac_doc: STAC document for publication to SNS topic. :return: Returns nothing. Raises an exception if anything goes wrong. @@ -221,8 +234,10 @@ def index_update_dataset( archive_stacs = [] added = False updated = False - if archive_less_mature and publish_action: - dupes = dc.index.datasets.find_less_mature(ds, 500) + + valid_delta = type(archive_less_mature) is int and archive_less_mature >= 0 + if valid_delta and publish_action: + dupes = dc.index.datasets.find_less_mature(ds, archive_less_mature) for dupe in dupes: archive_stacs.append(ds_to_stac(dupe)) From 2783a86cf3aae60a935bae9b0c45ac4cec52e59e Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Mon, 9 Oct 2023 04:47:01 +0000 Subject: [PATCH 2/6] fix pylint issues --- apps/dc_tools/odc/apps/dc_tools/utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/apps/dc_tools/odc/apps/dc_tools/utils.py b/apps/dc_tools/odc/apps/dc_tools/utils.py index fa7f00bcb..b19be49bd 100644 --- a/apps/dc_tools/odc/apps/dc_tools/utils.py +++ b/apps/dc_tools/odc/apps/dc_tools/utils.py @@ -3,7 +3,7 @@ import importlib_resources from datadog import statsd, initialize from odc.aws.queue import publish_to_topic -from typing import Iterable, Optional, Union +from typing import Optional from datacube import Datacube from datacube.index.hl import Doc2Dataset @@ -209,8 +209,8 @@ def index_update_dataset( * If True, enforce dataset maturity by looking for existing datasets with same product, region_code and time values. If a less mature match is found, it is archived and replaced with the new dataset being inserted. If a match of the same or greater maturity is found a SkippedException is raised. - * If an integer is provided, it is used as the timedelta value for allowing a leniency when comparing timestamp - values, for datasets where there is a slight discrepancy. Default is 500ms. + * If an integer is provided, it is used as the timedelta value for allowing a leniency when comparing + timestamp values, for datasets where there is a slight discrepancy. Default is 500ms. :param publish_action: SNS topic arn to publish action to. :param stac_doc: STAC document for publication to SNS topic. :return: Returns nothing. Raises an exception if anything goes wrong. @@ -235,7 +235,7 @@ def index_update_dataset( added = False updated = False - valid_delta = type(archive_less_mature) is int and archive_less_mature >= 0 + valid_delta = isinstance(archive_less_mature, int) and archive_less_mature >= 0 if valid_delta and publish_action: dupes = dc.index.datasets.find_less_mature(ds, archive_less_mature) for dupe in dupes: From b7c997170fc6958fb8eb580e614d255cf8d174fc Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Tue, 10 Oct 2023 01:23:07 +0000 Subject: [PATCH 3/6] ensure archive_less_mature is an int --- .../dc_tools/odc/apps/dc_tools/azure_to_dc.py | 4 +++- .../odc/apps/dc_tools/cop_dem_to_dc.py | 3 +++ .../odc/apps/dc_tools/esa_worldcover_to_dc.py | 3 +++ apps/dc_tools/odc/apps/dc_tools/fs_to_dc.py | 3 +++ apps/dc_tools/odc/apps/dc_tools/s3_to_dc.py | 3 +++ apps/dc_tools/odc/apps/dc_tools/sqs_to_dc.py | 3 +++ .../odc/apps/dc_tools/stac_api_to_dc.py | 3 +++ apps/dc_tools/odc/apps/dc_tools/utils.py | 20 ++++--------------- 8 files changed, 25 insertions(+), 17 deletions(-) diff --git a/apps/dc_tools/odc/apps/dc_tools/azure_to_dc.py b/apps/dc_tools/odc/apps/dc_tools/azure_to_dc.py index 5b220df93..44be4e09c 100644 --- a/apps/dc_tools/odc/apps/dc_tools/azure_to_dc.py +++ b/apps/dc_tools/odc/apps/dc_tools/azure_to_dc.py @@ -39,7 +39,7 @@ def dump_list_to_odc( update: Optional[bool] = False, update_if_exists: Optional[bool] = False, allow_unsafe: Optional[bool] = False, - archive_less_mature: Optional[bool] = False, + archive_less_mature: Optional[int] = None, publish_action: Optional[str] = None, ): ds_added = 0 @@ -118,6 +118,8 @@ def cli( container_name, credential, prefix, suffix, account_url=account_url ) + if isinstance(archive_less_mature, str): + archive_less_mature = int(archive_less_mature) # Consume generator and fetch YAML's added, failed = dump_list_to_odc( account_url, diff --git a/apps/dc_tools/odc/apps/dc_tools/cop_dem_to_dc.py b/apps/dc_tools/odc/apps/dc_tools/cop_dem_to_dc.py index 0acd4b788..8737994eb 100644 --- a/apps/dc_tools/odc/apps/dc_tools/cop_dem_to_dc.py +++ b/apps/dc_tools/odc/apps/dc_tools/cop_dem_to_dc.py @@ -251,6 +251,9 @@ def cli( print(f"Indexing Copernicus DEM for {product} with bounding box of {bbox}") + if isinstance(archive_less_mature, str): + archive_less_mature = int(archive_less_mature) + added, failed, skipped = cop_dem_to_dc( dc, product, diff --git a/apps/dc_tools/odc/apps/dc_tools/esa_worldcover_to_dc.py b/apps/dc_tools/odc/apps/dc_tools/esa_worldcover_to_dc.py index 8474bc410..29135b351 100644 --- a/apps/dc_tools/odc/apps/dc_tools/esa_worldcover_to_dc.py +++ b/apps/dc_tools/odc/apps/dc_tools/esa_worldcover_to_dc.py @@ -262,6 +262,9 @@ def cli( print(f"Indexing ESA WorldCover with bounding box of {bbox}") + if isinstance(archive_less_mature, str): + archive_less_mature = int(archive_less_mature) + added, failed = esa_wc_to_dc( dc, bbox, diff --git a/apps/dc_tools/odc/apps/dc_tools/fs_to_dc.py b/apps/dc_tools/odc/apps/dc_tools/fs_to_dc.py index f72e77e2a..1b07d3cc5 100755 --- a/apps/dc_tools/odc/apps/dc_tools/fs_to_dc.py +++ b/apps/dc_tools/odc/apps/dc_tools/fs_to_dc.py @@ -58,6 +58,9 @@ def cli( added, failed = 0, 0 + if isinstance(archive_less_mature, str): + archive_less_mature = int(archive_less_mature) + for in_file in files_to_process: with in_file.open() as f: try: diff --git a/apps/dc_tools/odc/apps/dc_tools/s3_to_dc.py b/apps/dc_tools/odc/apps/dc_tools/s3_to_dc.py index c128042de..0b346db6c 100755 --- a/apps/dc_tools/odc/apps/dc_tools/s3_to_dc.py +++ b/apps/dc_tools/odc/apps/dc_tools/s3_to_dc.py @@ -166,6 +166,9 @@ def cli( url.url for url in s3_find_glob(uri, skip_check=skip_check, s3=fetcher, **opts) ) + if isinstance(archive_less_mature, str): + archive_less_mature = int(archive_less_mature) + added, failed, skipped = dump_to_odc( fetcher(document_stream), dc, diff --git a/apps/dc_tools/odc/apps/dc_tools/sqs_to_dc.py b/apps/dc_tools/odc/apps/dc_tools/sqs_to_dc.py index 6ddb321e1..2ddefe4bc 100644 --- a/apps/dc_tools/odc/apps/dc_tools/sqs_to_dc.py +++ b/apps/dc_tools/odc/apps/dc_tools/sqs_to_dc.py @@ -382,6 +382,9 @@ def cli( sqs = boto3.resource("sqs") queue = sqs.get_queue_by_name(QueueName=queue_name) + if isinstance(archive_less_mature, str): + archive_less_mature = int(archive_less_mature) + # Do the thing dc = Datacube() success, failed, skipped = queue_to_odc( diff --git a/apps/dc_tools/odc/apps/dc_tools/stac_api_to_dc.py b/apps/dc_tools/odc/apps/dc_tools/stac_api_to_dc.py index 35863f260..767e248d6 100644 --- a/apps/dc_tools/odc/apps/dc_tools/stac_api_to_dc.py +++ b/apps/dc_tools/odc/apps/dc_tools/stac_api_to_dc.py @@ -297,6 +297,9 @@ def cli( "Rewrite assets argument needs to be two strings split by ','" ) + if isinstance(archive_less_mature, str): + archive_less_mature = int(archive_less_mature) + # Do the thing dc = Datacube() added, failed, skipped = stac_api_to_odc( diff --git a/apps/dc_tools/odc/apps/dc_tools/utils.py b/apps/dc_tools/odc/apps/dc_tools/utils.py index b19be49bd..d65625b91 100644 --- a/apps/dc_tools/odc/apps/dc_tools/utils.py +++ b/apps/dc_tools/odc/apps/dc_tools/utils.py @@ -111,17 +111,6 @@ class SkippedException(Exception): help="Needed when accessing requester pays public buckets.", ) -archive_less_mature = click.option( - "--archive-less-mature", - is_flag=True, - default=False, - help=( - "Archive existing any datasets that match product, " - "time and region-code, but have lower dataset-maturity." - "Note: An error will be raised and the dataset add will " - "fail if a matching dataset with higher or equal dataset-maturity." - ), -) archive_less_mature = click.option( "--archive-less-mature", is_flag=False, @@ -205,11 +194,11 @@ def index_update_dataset( :param update_if_exists: If true allow insert or update. :param allow_unsafe: Allow unsafe (arbitrary) dataset updates. :param archive_less_mature: Enforce dataset maturity. - * If None (the default) or False, ignore dataset maturity. - * If True, enforce dataset maturity by looking for existing datasets with same product, region_code and time + * If None (the default), ignore dataset maturity. + * If integer, enforce dataset maturity by looking for existing datasets with same product, region_code and time values. If a less mature match is found, it is archived and replaced with the new dataset being inserted. If a match of the same or greater maturity is found a SkippedException is raised. - * If an integer is provided, it is used as the timedelta value for allowing a leniency when comparing + The integer value is used as the timedelta value for allowing a leniency when comparing timestamp values, for datasets where there is a slight discrepancy. Default is 500ms. :param publish_action: SNS topic arn to publish action to. :param stac_doc: STAC document for publication to SNS topic. @@ -235,8 +224,7 @@ def index_update_dataset( added = False updated = False - valid_delta = isinstance(archive_less_mature, int) and archive_less_mature >= 0 - if valid_delta and publish_action: + if isinstance(archive_less_mature, int) and publish_action: dupes = dc.index.datasets.find_less_mature(ds, archive_less_mature) for dupe in dupes: archive_stacs.append(ds_to_stac(dupe)) From 380a15b36f1654bf820dc496bffc3973a642193a Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Tue, 10 Oct 2023 01:34:21 +0000 Subject: [PATCH 4/6] appease pre-commit --- apps/dc_tools/odc/apps/dc_tools/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/dc_tools/odc/apps/dc_tools/utils.py b/apps/dc_tools/odc/apps/dc_tools/utils.py index d65625b91..98b5c51e3 100644 --- a/apps/dc_tools/odc/apps/dc_tools/utils.py +++ b/apps/dc_tools/odc/apps/dc_tools/utils.py @@ -195,7 +195,7 @@ def index_update_dataset( :param allow_unsafe: Allow unsafe (arbitrary) dataset updates. :param archive_less_mature: Enforce dataset maturity. * If None (the default), ignore dataset maturity. - * If integer, enforce dataset maturity by looking for existing datasets with same product, region_code and time + * If int, enforce dataset maturity by looking for existing datasets with same product, region_code and time values. If a less mature match is found, it is archived and replaced with the new dataset being inserted. If a match of the same or greater maturity is found a SkippedException is raised. The integer value is used as the timedelta value for allowing a leniency when comparing From 8aee5d3921f8a966ddd0652ab5f62132cac7493f Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Tue, 10 Oct 2023 03:52:34 +0000 Subject: [PATCH 5/6] specify --archive-less-mature type as int --- apps/dc_tools/odc/apps/dc_tools/azure_to_dc.py | 2 -- apps/dc_tools/odc/apps/dc_tools/cop_dem_to_dc.py | 3 --- apps/dc_tools/odc/apps/dc_tools/esa_worldcover_to_dc.py | 3 --- apps/dc_tools/odc/apps/dc_tools/fs_to_dc.py | 3 --- apps/dc_tools/odc/apps/dc_tools/s3_to_dc.py | 3 --- apps/dc_tools/odc/apps/dc_tools/sqs_to_dc.py | 3 --- apps/dc_tools/odc/apps/dc_tools/stac_api_to_dc.py | 3 --- apps/dc_tools/odc/apps/dc_tools/utils.py | 1 + 8 files changed, 1 insertion(+), 20 deletions(-) diff --git a/apps/dc_tools/odc/apps/dc_tools/azure_to_dc.py b/apps/dc_tools/odc/apps/dc_tools/azure_to_dc.py index 44be4e09c..52ee4ec4f 100644 --- a/apps/dc_tools/odc/apps/dc_tools/azure_to_dc.py +++ b/apps/dc_tools/odc/apps/dc_tools/azure_to_dc.py @@ -118,8 +118,6 @@ def cli( container_name, credential, prefix, suffix, account_url=account_url ) - if isinstance(archive_less_mature, str): - archive_less_mature = int(archive_less_mature) # Consume generator and fetch YAML's added, failed = dump_list_to_odc( account_url, diff --git a/apps/dc_tools/odc/apps/dc_tools/cop_dem_to_dc.py b/apps/dc_tools/odc/apps/dc_tools/cop_dem_to_dc.py index 8737994eb..0acd4b788 100644 --- a/apps/dc_tools/odc/apps/dc_tools/cop_dem_to_dc.py +++ b/apps/dc_tools/odc/apps/dc_tools/cop_dem_to_dc.py @@ -251,9 +251,6 @@ def cli( print(f"Indexing Copernicus DEM for {product} with bounding box of {bbox}") - if isinstance(archive_less_mature, str): - archive_less_mature = int(archive_less_mature) - added, failed, skipped = cop_dem_to_dc( dc, product, diff --git a/apps/dc_tools/odc/apps/dc_tools/esa_worldcover_to_dc.py b/apps/dc_tools/odc/apps/dc_tools/esa_worldcover_to_dc.py index 29135b351..8474bc410 100644 --- a/apps/dc_tools/odc/apps/dc_tools/esa_worldcover_to_dc.py +++ b/apps/dc_tools/odc/apps/dc_tools/esa_worldcover_to_dc.py @@ -262,9 +262,6 @@ def cli( print(f"Indexing ESA WorldCover with bounding box of {bbox}") - if isinstance(archive_less_mature, str): - archive_less_mature = int(archive_less_mature) - added, failed = esa_wc_to_dc( dc, bbox, diff --git a/apps/dc_tools/odc/apps/dc_tools/fs_to_dc.py b/apps/dc_tools/odc/apps/dc_tools/fs_to_dc.py index 1b07d3cc5..f72e77e2a 100755 --- a/apps/dc_tools/odc/apps/dc_tools/fs_to_dc.py +++ b/apps/dc_tools/odc/apps/dc_tools/fs_to_dc.py @@ -58,9 +58,6 @@ def cli( added, failed = 0, 0 - if isinstance(archive_less_mature, str): - archive_less_mature = int(archive_less_mature) - for in_file in files_to_process: with in_file.open() as f: try: diff --git a/apps/dc_tools/odc/apps/dc_tools/s3_to_dc.py b/apps/dc_tools/odc/apps/dc_tools/s3_to_dc.py index 0b346db6c..c128042de 100755 --- a/apps/dc_tools/odc/apps/dc_tools/s3_to_dc.py +++ b/apps/dc_tools/odc/apps/dc_tools/s3_to_dc.py @@ -166,9 +166,6 @@ def cli( url.url for url in s3_find_glob(uri, skip_check=skip_check, s3=fetcher, **opts) ) - if isinstance(archive_less_mature, str): - archive_less_mature = int(archive_less_mature) - added, failed, skipped = dump_to_odc( fetcher(document_stream), dc, diff --git a/apps/dc_tools/odc/apps/dc_tools/sqs_to_dc.py b/apps/dc_tools/odc/apps/dc_tools/sqs_to_dc.py index 2ddefe4bc..6ddb321e1 100644 --- a/apps/dc_tools/odc/apps/dc_tools/sqs_to_dc.py +++ b/apps/dc_tools/odc/apps/dc_tools/sqs_to_dc.py @@ -382,9 +382,6 @@ def cli( sqs = boto3.resource("sqs") queue = sqs.get_queue_by_name(QueueName=queue_name) - if isinstance(archive_less_mature, str): - archive_less_mature = int(archive_less_mature) - # Do the thing dc = Datacube() success, failed, skipped = queue_to_odc( diff --git a/apps/dc_tools/odc/apps/dc_tools/stac_api_to_dc.py b/apps/dc_tools/odc/apps/dc_tools/stac_api_to_dc.py index 767e248d6..35863f260 100644 --- a/apps/dc_tools/odc/apps/dc_tools/stac_api_to_dc.py +++ b/apps/dc_tools/odc/apps/dc_tools/stac_api_to_dc.py @@ -297,9 +297,6 @@ def cli( "Rewrite assets argument needs to be two strings split by ','" ) - if isinstance(archive_less_mature, str): - archive_less_mature = int(archive_less_mature) - # Do the thing dc = Datacube() added, failed, skipped = stac_api_to_odc( diff --git a/apps/dc_tools/odc/apps/dc_tools/utils.py b/apps/dc_tools/odc/apps/dc_tools/utils.py index 98b5c51e3..60d78b47d 100644 --- a/apps/dc_tools/odc/apps/dc_tools/utils.py +++ b/apps/dc_tools/odc/apps/dc_tools/utils.py @@ -116,6 +116,7 @@ class SkippedException(Exception): is_flag=False, flag_value=500, default=None, + type=int, help=( "Archive existing any datasets that match product, " "time and region-code, but have lower dataset-maturity." From e8643c8752242fc46d560da2cda8a4fccaa90f1b Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Tue, 10 Oct 2023 05:24:28 +0000 Subject: [PATCH 6/6] one last test + update version number --- apps/dc_tools/odc/apps/dc_tools/_version.py | 2 +- apps/dc_tools/tests/test_fs_to_dc.py | 30 +++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/apps/dc_tools/odc/apps/dc_tools/_version.py b/apps/dc_tools/odc/apps/dc_tools/_version.py index 11ef09286..f3291e93b 100644 --- a/apps/dc_tools/odc/apps/dc_tools/_version.py +++ b/apps/dc_tools/odc/apps/dc_tools/_version.py @@ -1 +1 @@ -__version__ = "0.2.13" +__version__ = "0.2.14" diff --git a/apps/dc_tools/tests/test_fs_to_dc.py b/apps/dc_tools/tests/test_fs_to_dc.py index 2daa7a61e..49edadef9 100644 --- a/apps/dc_tools/tests/test_fs_to_dc.py +++ b/apps/dc_tools/tests/test_fs_to_dc.py @@ -51,6 +51,36 @@ def test_archive_less_mature(odc_db, test_data_dir, nrt_dsid, final_dsid): assert dc.index.datasets.get(nrt_dsid).archived_time is not None +def test_dont_archive_less_mature(odc_db, test_data_dir, nrt_dsid, final_dsid): + # no archiving should be done if --archive-less-mature is not set + dc = odc_db + runner = CliRunner() + + # Index NRT dataset + result = runner.invoke( + fs_to_dc_cli, + [ + test_data_dir, + "--glob=**/maturity-nrt.odc-metadata.yaml", + ], + ) + assert result.exit_code == 0 + assert dc.index.datasets.get(final_dsid) is None + assert dc.index.datasets.get(nrt_dsid).archived_time is None + + # Index Final dataset (autoarchiving NRT) + result = runner.invoke( + fs_to_dc_cli, + [ + test_data_dir, + "--glob=**/maturity-final.odc-metadata.yaml", + ], + ) + assert result.exit_code == 0 + assert dc.index.datasets.get(final_dsid).archived_time is None + assert dc.index.datasets.get(nrt_dsid).archived_time is None + + def test_keep_more_mature(odc_db, test_data_dir, nrt_dsid, final_dsid): dc = odc_db runner = CliRunner()