From 856ca412cdc125d603c8a4068c7b79a6f4f13a4d Mon Sep 17 00:00:00 2001 From: Victor Verhaert <33786515+VictorVerhaert@users.noreply.github.com> Date: Wed, 24 Apr 2024 16:40:02 +0200 Subject: [PATCH] fixed env files --- conda-environment.yaml | 4 ++-- requirements/requirements.txt | 1 + stacbuilder/terracatalog.py | 8 +++++--- tests/stacapi/test_endpoints.py | 4 ---- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/conda-environment.yaml b/conda-environment.yaml index ff9a369..79b635b 100644 --- a/conda-environment.yaml +++ b/conda-environment.yaml @@ -6,6 +6,7 @@ dependencies: - click=8.1.* - geopandas=0.14.* - openeo=0.26.* + - psutil - pyarrow=14.0.* - pydantic=2.5.* - pyproj=3.6.* @@ -17,12 +18,11 @@ dependencies: - shapely=2.0.* - stac-validator=3.3.* - stactools=0.5.* - - psutil - ipykernel - jupyter - pytest - - pip - yarl=1.9.* + - pip - pip: - --extra-index-url https://artifactory.vgt.vito.be/artifactory/api/pypi/python-packages/simple - terracatalogueclient==0.1.14 diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 37593fd..b7dffb7 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -4,6 +4,7 @@ click==8.1.* dynaconf==3.2.* geopandas==0.14.* openeo==0.26.* +psutils pyarrow==14.0.* pydantic==2.5.* pyproj==3.6.* diff --git a/stacbuilder/terracatalog.py b/stacbuilder/terracatalog.py index de0db83..f4b64c3 100644 --- a/stacbuilder/terracatalog.py +++ b/stacbuilder/terracatalog.py @@ -459,6 +459,7 @@ def get_products_as_dataframe(self) -> gpd.GeoDataFrame: # HACK parameters to split up calculation into smaller chunks slice_length = 100 # limits the active threads to prevent OOM errors + limit_chunks = False min_chunk, max_chunk = 0, 1000 # limits the number of chunks we process catalogue = self.get_tcc_catalogue() @@ -481,7 +482,7 @@ def get_products_as_dataframe(self) -> gpd.GeoDataFrame: if limit_reached: break - if not (min_chunk <= query_slots_iterator < max_chunk): + if limit_chunks and (not (min_chunk <= query_slots_iterator < max_chunk)): # This is a temporary measure to prevent OOM errors. # We should find a better way to limit the number of products we process. self._log_progress_message( @@ -509,11 +510,12 @@ def get_products_as_dataframe(self) -> gpd.GeoDataFrame: ) for future in concurrent.futures.as_completed(futures): - new_products = future.result() + future_result = future.result() + new_products = [p for p in future_result if p.id not in product_ids] + if not new_products: # Avoid doing unnecessary work, might add empty dataframes to the total dataframe. continue - new_products = [p for p in new_products if p.id not in product_ids] self._log_progress_message(f"Number of new products {len(new_products)}", level=logging.DEBUG) product_ids.update([p.id for p in new_products]) self._log_progress_message(f"Number of unique products {len(product_ids)}", level=logging.DEBUG) diff --git a/tests/stacapi/test_endpoints.py b/tests/stacapi/test_endpoints.py index 78b9c0f..3770bea 100644 --- a/tests/stacapi/test_endpoints.py +++ b/tests/stacapi/test_endpoints.py @@ -164,10 +164,6 @@ def test_get(self, requests_mock, empty_collection: Collection, collection_endpt assert empty_collection.to_dict() == actual_collection.to_dict() assert m.called - @pytest.mark.xfail(reason="Test not implemented yet") - def test_get_all(self, requests_mock, empty_collection: Collection, collection_endpt: CollectionsEndpoint): - assert False, "Test not implemented yet" - def test_create(self, requests_mock, empty_collection: Collection, collection_endpt: CollectionsEndpoint): m = requests_mock.post(str(self.BASE_URL / "collections"), json=empty_collection.to_dict(), status_code=201) response_json = collection_endpt.create(empty_collection)