Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update archive_less_mature option to expect int value #579

Merged
merged 8 commits into from
Oct 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion apps/dc_tools/odc/apps/dc_tools/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.2.13"
__version__ = "0.2.14"
4 changes: 2 additions & 2 deletions apps/dc_tools/odc/apps/dc_tools/azure_to_dc.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def dump_list_to_odc(
update: Optional[bool] = False,
update_if_exists: Optional[bool] = False,
allow_unsafe: Optional[bool] = False,
archive_less_mature: Optional[bool] = False,
archive_less_mature: Optional[int] = None,
publish_action: Optional[str] = None,
):
ds_added = 0
Expand Down Expand Up @@ -102,7 +102,7 @@ def cli(
allow_unsafe: bool,
stac: bool,
statsd_setting: str,
archive_less_mature: bool,
archive_less_mature: int,
publish_action: str,
account_url: str,
container_name: str,
Expand Down
2 changes: 1 addition & 1 deletion apps/dc_tools/odc/apps/dc_tools/cop_dem_to_dc.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def process_uri_tile(
dc: Datacube,
doc2ds: Doc2Dataset,
update_if_exists: bool = True,
archive_less_mature: bool = False,
archive_less_mature: int = None,
publish_action: str = None,
) -> Tuple[pystac.Item, str]:
product_name = f"dem_{product}"
Expand Down
4 changes: 2 additions & 2 deletions apps/dc_tools/odc/apps/dc_tools/esa_worldcover_to_dc.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def process_uri_tile(
dc: Datacube,
doc2ds: Doc2Dataset,
update_if_exists: bool = True,
archive_less_mature: bool = False,
archive_less_mature: int = None,
publish_action: str = None,
) -> Tuple[pystac.Item, str]:
product_name = "esa_worldcover_" + map_version["year"]
Expand Down Expand Up @@ -166,7 +166,7 @@ def esa_wc_to_dc(
limit: int,
update: bool,
n_workers: int = 100,
archive_less_mature: bool = False,
archive_less_mature: int = None,
publish_action: str = None,
) -> Tuple[int, int]:
doc2ds = Doc2Dataset(dc.index)
Expand Down
4 changes: 2 additions & 2 deletions apps/dc_tools/odc/apps/dc_tools/stac_api_to_dc.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def process_item(
allow_unsafe: bool,
rewrite: Optional[Tuple[str, str]] = None,
rename_product: Optional[str] = None,
archive_less_mature: bool = False,
archive_less_mature: int = None,
publish_action: bool = False,
):
meta, uri, stac = item_to_meta_uri(item, rewrite, rename_product)
Expand All @@ -150,7 +150,7 @@ def stac_api_to_odc(
allow_unsafe: bool = True,
rewrite: Optional[Tuple[str, str]] = None,
rename_product: Optional[str] = None,
archive_less_mature: bool = False,
archive_less_mature: int = None,
publish_action: Optional[str] = None,
) -> Tuple[int, int, int]:
doc2ds = Doc2Dataset(dc.index)
Expand Down
26 changes: 15 additions & 11 deletions apps/dc_tools/odc/apps/dc_tools/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import importlib_resources
from datadog import statsd, initialize
from odc.aws.queue import publish_to_topic
from typing import Iterable, Optional, Union
from typing import Optional

from datacube import Datacube
from datacube.index.hl import Doc2Dataset
Expand Down Expand Up @@ -113,13 +113,17 @@ class SkippedException(Exception):

archive_less_mature = click.option(
"--archive-less-mature",
is_flag=True,
default=False,
is_flag=False,
flag_value=500,
default=None,
type=int,
help=(
"Archive existing any datasets that match product, "
"time and region-code, but have lower dataset-maturity."
"Note: An error will be raised and the dataset add will "
"fail if a matching dataset with higher or equal dataset-maturity."
"Can specify an of leniency for comparing timestamps, provided in milliseconds. "
"Default value is 500ms."
),
)

Expand Down Expand Up @@ -176,7 +180,7 @@ def index_update_dataset(
update: bool = False,
update_if_exists: bool = False,
allow_unsafe: bool = False,
archive_less_mature: Optional[Union[bool, Iterable[str]]] = None,
archive_less_mature: Optional[int] = None,
publish_action: Optional[str] = None,
stac_doc: Optional[dict] = None,
) -> int:
Expand All @@ -191,13 +195,12 @@ def index_update_dataset(
:param update_if_exists: If true allow insert or update.
:param allow_unsafe: Allow unsafe (arbitrary) dataset updates.
:param archive_less_mature: Enforce dataset maturity.
* If None (the default) or False or an empty iterable, ignore dataset maturity.
* If True, enforce dataset maturity by looking for existing datasets with same product, region_code and time
* If None (the default), ignore dataset maturity.
* If int, enforce dataset maturity by looking for existing datasets with same product, region_code and time
values. If a less mature match is found, it is archived and replaced with the new dataset being inserted.
If a match of the same or greater maturity is found a SkippedException is raised.
* If an iterable of valid search field names is provided, it is used as the "grouping" fields for
identifying dataset maturity matches.
(i.e. `archive_less_mature=True` is the same as `archive_less_mature=['region_code', 'time'])
The integer value is used as the timedelta value for allowing a leniency when comparing
timestamp values, for datasets where there is a slight discrepancy. Default is 500ms.
:param publish_action: SNS topic arn to publish action to.
:param stac_doc: STAC document for publication to SNS topic.
:return: Returns nothing. Raises an exception if anything goes wrong.
Expand All @@ -221,8 +224,9 @@ def index_update_dataset(
archive_stacs = []
added = False
updated = False
if archive_less_mature and publish_action:
dupes = dc.index.datasets.find_less_mature(ds, 500)

if isinstance(archive_less_mature, int) and publish_action:
dupes = dc.index.datasets.find_less_mature(ds, archive_less_mature)
for dupe in dupes:
archive_stacs.append(ds_to_stac(dupe))

Expand Down
30 changes: 30 additions & 0 deletions apps/dc_tools/tests/test_fs_to_dc.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,36 @@ def test_archive_less_mature(odc_db, test_data_dir, nrt_dsid, final_dsid):
assert dc.index.datasets.get(nrt_dsid).archived_time is not None


def test_dont_archive_less_mature(odc_db, test_data_dir, nrt_dsid, final_dsid):
# no archiving should be done if --archive-less-mature is not set
dc = odc_db
runner = CliRunner()

# Index NRT dataset
result = runner.invoke(
fs_to_dc_cli,
[
test_data_dir,
"--glob=**/maturity-nrt.odc-metadata.yaml",
],
)
assert result.exit_code == 0
assert dc.index.datasets.get(final_dsid) is None
assert dc.index.datasets.get(nrt_dsid).archived_time is None

# Index Final dataset (autoarchiving NRT)
result = runner.invoke(
fs_to_dc_cli,
[
test_data_dir,
"--glob=**/maturity-final.odc-metadata.yaml",
],
)
assert result.exit_code == 0
assert dc.index.datasets.get(final_dsid).archived_time is None
assert dc.index.datasets.get(nrt_dsid).archived_time is None


def test_keep_more_mature(odc_db, test_data_dir, nrt_dsid, final_dsid):
dc = odc_db
runner = CliRunner()
Expand Down
Loading