From 856ca412cdc125d603c8a4068c7b79a6f4f13a4d Mon Sep 17 00:00:00 2001
From: Victor Verhaert <33786515+VictorVerhaert@users.noreply.github.com>
Date: Wed, 24 Apr 2024 16:40:02 +0200
Subject: [PATCH] fixed env files

---
 conda-environment.yaml          | 4 ++--
 requirements/requirements.txt   | 1 +
 stacbuilder/terracatalog.py     | 8 +++++---
 tests/stacapi/test_endpoints.py | 4 ----
 4 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/conda-environment.yaml b/conda-environment.yaml
index ff9a369..79b635b 100644
--- a/conda-environment.yaml
+++ b/conda-environment.yaml
@@ -6,6 +6,7 @@ dependencies:
   - click=8.1.*
   - geopandas=0.14.*
   - openeo=0.26.*
+  - psutil
   - pyarrow=14.0.*
   - pydantic=2.5.*
   - pyproj=3.6.*
@@ -17,12 +18,11 @@ dependencies:
   - shapely=2.0.*
   - stac-validator=3.3.*
   - stactools=0.5.*
-  - psutil
   - ipykernel
   - jupyter
   - pytest
-  - pip
   - yarl=1.9.*
+  - pip
   - pip:
     - --extra-index-url https://artifactory.vgt.vito.be/artifactory/api/pypi/python-packages/simple
     - terracatalogueclient==0.1.14
diff --git a/requirements/requirements.txt b/requirements/requirements.txt
index 37593fd..b7dffb7 100644
--- a/requirements/requirements.txt
+++ b/requirements/requirements.txt
@@ -4,6 +4,7 @@ click==8.1.*
 dynaconf==3.2.*
 geopandas==0.14.*
 openeo==0.26.*
+psutils
 pyarrow==14.0.*
 pydantic==2.5.*
 pyproj==3.6.*
diff --git a/stacbuilder/terracatalog.py b/stacbuilder/terracatalog.py
index de0db83..f4b64c3 100644
--- a/stacbuilder/terracatalog.py
+++ b/stacbuilder/terracatalog.py
@@ -459,6 +459,7 @@ def get_products_as_dataframe(self) -> gpd.GeoDataFrame:
 
         # HACK parameters to split up calculation into smaller chunks
         slice_length = 100  # limits the active threads to prevent OOM errors
+        limit_chunks = False
         min_chunk, max_chunk = 0, 1000  # limits the number of chunks we process
 
         catalogue = self.get_tcc_catalogue()
@@ -481,7 +482,7 @@ def get_products_as_dataframe(self) -> gpd.GeoDataFrame:
                 if limit_reached:
                     break
 
-                if not (min_chunk <= query_slots_iterator < max_chunk):
+                if limit_chunks and (not (min_chunk <= query_slots_iterator < max_chunk)):
                     # This is a temporary measure to prevent OOM errors.
                     # We should find a better way to limit the number of products we process.
                     self._log_progress_message(
@@ -509,11 +510,12 @@ def get_products_as_dataframe(self) -> gpd.GeoDataFrame:
                     )
 
                     for future in concurrent.futures.as_completed(futures):
-                        new_products = future.result()
+                        future_result = future.result()
+                        new_products = [p for p in future_result if p.id not in product_ids]
+
                         if not new_products:
                             # Avoid doing unnecessary work, might add empty dataframes to the total dataframe.
                             continue
-                        new_products = [p for p in new_products if p.id not in product_ids]
                         self._log_progress_message(f"Number of new products {len(new_products)}", level=logging.DEBUG)
                         product_ids.update([p.id for p in new_products])
                         self._log_progress_message(f"Number of unique products {len(product_ids)}", level=logging.DEBUG)
diff --git a/tests/stacapi/test_endpoints.py b/tests/stacapi/test_endpoints.py
index 78b9c0f..3770bea 100644
--- a/tests/stacapi/test_endpoints.py
+++ b/tests/stacapi/test_endpoints.py
@@ -164,10 +164,6 @@ def test_get(self, requests_mock, empty_collection: Collection, collection_endpt
         assert empty_collection.to_dict() == actual_collection.to_dict()
         assert m.called
 
-    @pytest.mark.xfail(reason="Test not implemented yet")
-    def test_get_all(self, requests_mock, empty_collection: Collection, collection_endpt: CollectionsEndpoint):
-        assert False, "Test not implemented yet"
-
     def test_create(self, requests_mock, empty_collection: Collection, collection_endpt: CollectionsEndpoint):
         m = requests_mock.post(str(self.BASE_URL / "collections"), json=empty_collection.to_dict(), status_code=201)
         response_json = collection_endpt.create(empty_collection)