From 4a1185d4b878754ae4c8a5a76e95430ca209323b Mon Sep 17 00:00:00 2001 From: "Peter A. Jonsson" Date: Fri, 30 Aug 2024 15:28:33 +0200 Subject: [PATCH 01/11] Dockerfile: fix casing warning This fixes the warning: FromAsCasing: 'as' and 'FROM' keywords' casing do not match emitted by recent Docker releases. --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 81a81447f..8566682c6 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -5,7 +5,7 @@ ## SPDX-License-Identifier: Apache-2.0 ## # gdal:ubuntu-small no longer comes with netcdf support compiled into gdal -FROM ghcr.io/osgeo/gdal:ubuntu-full-3.9.0 as builder +FROM ghcr.io/osgeo/gdal:ubuntu-full-3.9.0 AS builder FROM ghcr.io/osgeo/gdal:ubuntu-full-3.9.0 ARG V_PG=16 ARG V_PGIS=16-postgis-3 From 9e1337bf07af488e31a0eb99eb3e1a6e1f98e256 Mon Sep 17 00:00:00 2001 From: "Peter A. Jonsson" Date: Fri, 30 Aug 2024 15:30:36 +0200 Subject: [PATCH 02/11] Dockerfile: fix key value warning This fixes the warning: LegacyKeyValueFormat: "ENV key=value" should be used instead of legacy "ENV key value" format emitted by recent Docker releases. And while changing this, bring all the ENV lines into a single layer, which saves some space in the resulting image. --- docker/Dockerfile | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 8566682c6..84e9b82e7 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -34,15 +34,14 @@ RUN apt-get update -y \ # Build constrained python environment RUN virtualenv /env -ENV PYENV /env -ENV GDAL_CONFIG /usr/bin/gdal-config +# Set the locale, this is required for some of the Python packages +ENV PYENV=/env \ + GDAL_CONFIG=/usr/bin/gdal-config \ + LC_ALL=C.UTF-8 # Needed to build cf-units wheels. ARG UDUNITS2_XML_PATH=/usr/share/xml/udunits/udunits2-common.xml -# Set the locale, this is required for some of the Python packages -ENV LC_ALL C.UTF-8 - COPY docker/constraints.in /conf/requirements.txt COPY docker/constraints.txt docker/nobinary.txt /conf/ From df6a7692b2d826bb5e872680e5f9ff43ef4fba17 Mon Sep 17 00:00:00 2001 From: "Peter A. Jonsson" Date: Fri, 30 Aug 2024 15:39:20 +0200 Subject: [PATCH 03/11] Configure Dependabot for Docker image This will cause Dependabot to make pull requests for the Dockerfile when there are new releases of GDAL. --- .github/dependabot.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 90e05c40d..e115726a7 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -9,3 +9,7 @@ updates: directory: "/" # Location of package manifests schedule: interval: "weekly" + - package-ecosystem: docker + directory: "/docker" + schedule: + interval: "daily" From 79bd34004be3927184312594fbd1cd8b6f6cd497 Mon Sep 17 00:00:00 2001 From: "Peter A. Jonsson" Date: Fri, 30 Aug 2024 15:49:30 +0200 Subject: [PATCH 04/11] CI: set permissions for docpreview This is part of the documentation for the action: https://github.com/readthedocs/actions/tree/v1/preview --- .github/workflows/doc-qa.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/doc-qa.yaml b/.github/workflows/doc-qa.yaml index e850ea93f..b65e55227 100644 --- a/.github/workflows/doc-qa.yaml +++ b/.github/workflows/doc-qa.yaml @@ -8,6 +8,9 @@ on: # Allows you to run this workflow manually from the Actions tab workflow_dispatch: +permissions: + pull-requests: write + jobs: pyspell: runs-on: ubuntu-latest From cf7b610e32b176ff25d87b048c20c5857706179b Mon Sep 17 00:00:00 2001 From: "Peter A. Jonsson" Date: Fri, 30 Aug 2024 15:54:38 +0200 Subject: [PATCH 05/11] CI: only doc preview pull requests The documentation at https://github.com/readthedocs/actions/tree/v1/preview uses this. According to https://stackoverflow.com/questions/74957218/what-is-the-difference-between-pull-request-and-pull-request-target-event-in-git there is something with permissions of tokens that differ between pull_request and pull_request_target, so switch to what the action documentation suggests. --- .github/workflows/doc-qa.yaml | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/.github/workflows/doc-qa.yaml b/.github/workflows/doc-qa.yaml index b65e55227..233c7a988 100644 --- a/.github/workflows/doc-qa.yaml +++ b/.github/workflows/doc-qa.yaml @@ -1,12 +1,9 @@ name: Doc QA on: # Triggers the workflow on pull request events for the main branch - pull_request: - branches: - - 'develop' - - # Allows you to run this workflow manually from the Actions tab - workflow_dispatch: + pull_request_target: + types: + - opened permissions: pull-requests: write From 3d557f1e084afe06a6cd2f84ea99e63e36837b1c Mon Sep 17 00:00:00 2001 From: Caitlin Adams Date: Wed, 4 Sep 2024 10:28:34 +1000 Subject: [PATCH 06/11] Update docs README to give suitable instructions for local docs build (#1631) --- docs/README.rst | 52 +++++++++++++++++++++++++++++++++++++------ docs/requirements.txt | 2 +- 2 files changed, 46 insertions(+), 8 deletions(-) diff --git a/docs/README.rst b/docs/README.rst index 31a0a05bb..7e4725eb8 100644 --- a/docs/README.rst +++ b/docs/README.rst @@ -8,22 +8,60 @@ Developing Locally Requires a Unix like system that includes ``make``. -#. Install NodeJS + NPM -#. Install Browser Sync +#. Clone the datacube-core repository. If you don't have permissions to push to the datacube-core library, you will need to fork the repo and clone your fork. .. code-block:: bash - npm install -g browser-sync + git clone https://github.com/opendatacube/datacube-core.git -#. Install Python dependencies +#. Check out a new branch for the documentation feature you're working on .. code-block:: bash - pip install -r requirements.txt - pip install git+https://github.com/carrotandcompany/sphinx-autobuild.git@feature_event_delay + git switch -c docs- -#. Start the auto-building development server. +#. Change directory to the docs folder + +.. code-block:: bash + + cd docs + +#. Create a conda environment for python 3.11, with conda-forge as the channel + +.. code-block:: bash + + conda create --name datacubecoredocs -c conda-forge python=3.11 + +#. Activate the conda environment + +.. code-block:: bash + + conda activate datacubecoredocs + +#. Install pandoc + +.. code-block:: bash + + conda install pandoc + +#. Install requirements with pip + +.. code-block:: bash + + pip install -r requirements.txt + +#. Run the autobuild. .. code-block:: bash sphinx-autobuild . _build/html + +#. Open a browser and navigate to the URL provided by the autobuild + +#. Make changes to the docs. The terminal with the autobuild will continue to update the docs view in the browser. + +#. When finished, quit the autobuild process using ``ctrl-c`` in the terminal. + +#. Stage and commit your changes. + +#. When ready for review, push your changes and create a pull request. diff --git a/docs/requirements.txt b/docs/requirements.txt index ce8a5dcc8..587deab6f 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -177,7 +177,7 @@ platformdirs==3.0.0 # via jupyter-core psutil==5.9.4 # via distributed -psycopg2==2.9.5 +#psycopg2==2.9.5 # via datacube (setup.py) pydata-sphinx-theme==0.9.0 # via datacube (setup.py) From c311453963125e6f21bda9000611a822a444154d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 20 Sep 2024 15:34:14 +1000 Subject: [PATCH 07/11] Bump osgeo/gdal from ubuntu-full-3.9.0 to ubuntu-full-3.9.2 in /docker (#1629) Bumps [osgeo/gdal](https://github.com/OSGeo/gdal) from ubuntu-full-3.9.0 to ubuntu-full-3.9.2. - [Release notes](https://github.com/OSGeo/gdal/releases) - [Changelog](https://github.com/OSGeo/gdal/blob/master/NEWS.md) - [Commits](https://github.com/OSGeo/gdal/compare/v3.9.0...v3.9.2) --- updated-dependencies: - dependency-name: osgeo/gdal dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- docker/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 84e9b82e7..bcbaa4075 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -5,8 +5,8 @@ ## SPDX-License-Identifier: Apache-2.0 ## # gdal:ubuntu-small no longer comes with netcdf support compiled into gdal -FROM ghcr.io/osgeo/gdal:ubuntu-full-3.9.0 AS builder -FROM ghcr.io/osgeo/gdal:ubuntu-full-3.9.0 +FROM ghcr.io/osgeo/gdal:ubuntu-full-3.9.2 AS builder +FROM ghcr.io/osgeo/gdal:ubuntu-full-3.9.2 ARG V_PG=16 ARG V_PGIS=16-postgis-3 From 5ce3e44ad583518fc5d10a6988791f69f9f5c39d Mon Sep 17 00:00:00 2001 From: Paul Haesler Date: Fri, 20 Sep 2024 16:03:05 +1000 Subject: [PATCH 08/11] Cherry picks from develop: #1624, #1625, #1626, #1629, #1630, #1631 --- docs/about/whats_new.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/about/whats_new.rst b/docs/about/whats_new.rst index 449149799..eb8635ff7 100644 --- a/docs/about/whats_new.rst +++ b/docs/about/whats_new.rst @@ -19,6 +19,7 @@ v1.9.next - Drop valid-area check and anti-meridian fix 3857 extents (:pull:1635) - Remove problematic "common_timestamp" postgresql function from postgis driver. Some internal API changes required to accommodate and preserve all previous index-driver level behaviour. (:pull:`1623`) +- Cherry picks from 1.8 (#1624-#1626, #1629-#1631) (:pull:`1635`) v1.9.0-rc9 (3rd July 2024) ========================== From 75fbbbabe0aa6eb07047946b40da7c6fb89f4f29 Mon Sep 17 00:00:00 2001 From: Paul Haesler Date: Fri, 20 Sep 2024 16:46:39 +1000 Subject: [PATCH 09/11] Oops - fix typo from last PR. --- datacube/drivers/postgis/_api.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datacube/drivers/postgis/_api.py b/datacube/drivers/postgis/_api.py index 59d07cc19..819552864 100644 --- a/datacube/drivers/postgis/_api.py +++ b/datacube/drivers/postgis/_api.py @@ -1525,19 +1525,19 @@ def temporal_extent_by_prod(self, product_id: int) -> tuple[datetime.datetime, d query = self.temporal_extent_full().where(Dataset.product_ref == product_id) res = self._connection.execute(query) for tmin, tmax in res: - return (self.time_min.normalise_value(tmin), self.time_max.normalise_value(tmax)) + return (time_min.normalise_value(tmin), time_max.normalise_value(tmax)) raise RuntimeError("Product has no datasets and therefore no temporal extent") def temporal_extent_by_ids(self, ids: Iterable[DSID]) -> tuple[datetime.datetime, datetime.datetime]: query = self.temporal_extent_full().where(Dataset.id.in_(ids)) res = self._connection.execute(query) for tmin, tmax in res: - return (self.time_min.normalise_value(tmin), self.time_max.normalise_value(tmax)) + return (time_min.normalise_value(tmin), time_max.normalise_value(tmax)) raise ValueError("no dataset ids provided") def temporal_extent_full(self) -> Select: # Hardcode eo3 standard time locations - do not use this approach in a legacy index driver. return select( - func.min(self.time_min.alchemy_expression), func.max(self.time_max.alchemy_expression) + func.min(time_min.alchemy_expression), func.max(time_max.alchemy_expression) ) From d432d1f7456d8c9b5b14bf3f388b67b457323971 Mon Sep 17 00:00:00 2001 From: Paul Haesler Date: Fri, 20 Sep 2024 16:51:39 +1000 Subject: [PATCH 10/11] update PR number in whats_new --- docs/about/whats_new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/about/whats_new.rst b/docs/about/whats_new.rst index eb8635ff7..5200ce40f 100644 --- a/docs/about/whats_new.rst +++ b/docs/about/whats_new.rst @@ -19,7 +19,7 @@ v1.9.next - Drop valid-area check and anti-meridian fix 3857 extents (:pull:1635) - Remove problematic "common_timestamp" postgresql function from postgis driver. Some internal API changes required to accommodate and preserve all previous index-driver level behaviour. (:pull:`1623`) -- Cherry picks from 1.8 (#1624-#1626, #1629-#1631) (:pull:`1635`) +- Cherry picks from 1.8 (#1624-#1626, #1629-#1631) (:pull:`1637`) v1.9.0-rc9 (3rd July 2024) ========================== From f8154efdd956b4f042a773c80c92113bc0d15490 Mon Sep 17 00:00:00 2001 From: Ariana-B <40238244+ariana-b@users.noreply.github.com> Date: Wed, 4 Sep 2024 10:19:33 +1000 Subject: [PATCH 11/11] Don't error in dataset add if `product.id` is None (#1630) * don't assume product.id exists when adding a dataset * don't assume assume product.id exists when adding dataset * update whats_new --------- Co-authored-by: Ariana Barzinpour --- datacube/index/postgis/_datasets.py | 20 +++++++++++++------- datacube/index/postgres/_datasets.py | 7 ++++++- docs/about/whats_new.rst | 1 + integration_tests/index/test_index_data.py | 11 ++++++++++- tests/index/test_api_index_dataset.py | 3 +++ 5 files changed, 33 insertions(+), 9 deletions(-) diff --git a/datacube/index/postgis/_datasets.py b/datacube/index/postgis/_datasets.py index d6105bc5d..6744fee13 100755 --- a/datacube/index/postgis/_datasets.py +++ b/datacube/index/postgis/_datasets.py @@ -161,13 +161,19 @@ def add(self, dataset: Dataset, return dataset with self._db_connection(transaction=True) as transaction: # 1a. insert (if not already exists) - transaction.insert_dataset(dataset.metadata_doc_without_lineage(), dataset.id, dataset.product.id) - # 1b. Prepare spatial index extents - transaction.update_spindex(dsids=[dataset.id]) - transaction.update_search_index(dsids=[dataset.id]) - # 1c. Store locations - if dataset.uri is not None: - self._ensure_new_locations(dataset, transaction=transaction) + product_id = dataset.product.id + if product_id is None: + # don't assume the product has an id value since it's optional + # but we should error if the product doesn't exist in the db + product_id = self.products.get_by_name_unsafe(dataset.product.name).id + is_new = transaction.insert_dataset(dataset.metadata_doc_without_lineage(), dataset.id, product_id) + if is_new: + # 1b. Prepare spatial index extents + transaction.update_spindex(dsids=[dataset.id]) + transaction.update_search_index(dsids=[dataset.id]) + # 1c. Store locations + if dataset.uris is not None: + self._ensure_new_locations(dataset, transaction=transaction) if archive_less_mature is not None: self.archive_less_mature(dataset, archive_less_mature) if dataset.source_tree is not None: diff --git a/datacube/index/postgres/_datasets.py b/datacube/index/postgres/_datasets.py index adbd8e4fb..68f919f4f 100755 --- a/datacube/index/postgres/_datasets.py +++ b/datacube/index/postgres/_datasets.py @@ -159,7 +159,12 @@ def process_bunch(dss, main_ds, transaction): # First insert all new datasets for ds in dss: - is_new = transaction.insert_dataset(ds.metadata_doc_without_lineage(), ds.id, ds.product.id) + product_id = ds.product.id + if product_id is None: + # don't assume the product has an id value since it's optional + # but we should error if the product doesn't exist in the db + product_id = self.products.get_by_name_unsafe(ds.product.name).id + is_new = transaction.insert_dataset(ds.metadata_doc_without_lineage(), ds.id, product_id) sources = ds.sources if is_new and sources is not None: edges.extend((name, ds.id, src.id) diff --git a/docs/about/whats_new.rst b/docs/about/whats_new.rst index 5200ce40f..e625c0e6a 100644 --- a/docs/about/whats_new.rst +++ b/docs/about/whats_new.rst @@ -138,6 +138,7 @@ v1.9.0-rc1 (27th March 2024) v1.8.next ========= +- Don't error when adding a dataset whose product doesn't have an id value (:pull:`1630`) v1.8.19 (2nd July 2024) ======================= diff --git a/integration_tests/index/test_index_data.py b/integration_tests/index/test_index_data.py index f8a7d8260..e91eeccf2 100755 --- a/integration_tests/index/test_index_data.py +++ b/integration_tests/index/test_index_data.py @@ -18,7 +18,8 @@ from datacube.index.exceptions import MissingRecordError from datacube.index import Index -from datacube.model import Dataset, MetadataType +from datacube.model import Dataset, Product, MetadataType +from datacube.index.eo3 import prep_eo3 _telemetry_uuid = UUID('4ec8fe97-e8b9-11e4-87ff-1040f381a756') _telemetry_dataset = { @@ -258,6 +259,14 @@ def test_get_dataset(index: Index, ls8_eo3_dataset: Dataset) -> None: 'f226a278-e422-11e6-b501-185e0f80a5c1']) == [] +def test_add_dataset_no_product_id(index: Index, extended_eo3_metadata_type, ls8_eo3_product, eo3_ls8_dataset_doc): + product_no_id = Product(extended_eo3_metadata_type, ls8_eo3_product.definition) + assert product_no_id.id is None + dataset_doc, _ = eo3_ls8_dataset_doc + dataset = Dataset(product_no_id, prep_eo3(dataset_doc)) + assert index.datasets.add(dataset, with_lineage=False) + + def test_transactions_api_ctx_mgr(index, extended_eo3_metadata_type_doc, ls8_eo3_product, diff --git a/tests/index/test_api_index_dataset.py b/tests/index/test_api_index_dataset.py index a43899d6b..8eeeaa95c 100644 --- a/tests/index/test_api_index_dataset.py +++ b/tests/index/test_api_index_dataset.py @@ -208,6 +208,9 @@ def get(self, *args, **kwargs): def get_by_name(self, *args, **kwargs): return self.type + def get_by_name_unsafe(self, *args, **kwargs): + return self.type + @contextmanager def _db_connection(self, transaction=False): yield MockDb()