fixed env files

VitoTAP · Apr 24, 2024 · 856ca41 · 856ca41
1 parent d8572ff
commit 856ca41
Show file tree

Hide file tree

Showing 4 changed files with 8 additions and 9 deletions.
diff --git a/conda-environment.yaml b/conda-environment.yaml
@@ -6,6 +6,7 @@ dependencies:
   - click=8.1.*
   - geopandas=0.14.*
   - openeo=0.26.*
+  - psutil
   - pyarrow=14.0.*
   - pydantic=2.5.*
   - pyproj=3.6.*
@@ -17,12 +18,11 @@ dependencies:
   - shapely=2.0.*
   - stac-validator=3.3.*
   - stactools=0.5.*
-  - psutil
   - ipykernel
   - jupyter
   - pytest
-  - pip
   - yarl=1.9.*
+  - pip
   - pip:
     - --extra-index-url https://artifactory.vgt.vito.be/artifactory/api/pypi/python-packages/simple
     - terracatalogueclient==0.1.14

diff --git a/requirements/requirements.txt b/requirements/requirements.txt
@@ -4,6 +4,7 @@ click==8.1.*
 dynaconf==3.2.*
 geopandas==0.14.*
 openeo==0.26.*
+psutils
 pyarrow==14.0.*
 pydantic==2.5.*
 pyproj==3.6.*

diff --git a/stacbuilder/terracatalog.py b/stacbuilder/terracatalog.py
@@ -459,6 +459,7 @@ def get_products_as_dataframe(self) -> gpd.GeoDataFrame:
 
         # HACK parameters to split up calculation into smaller chunks
         slice_length = 100  # limits the active threads to prevent OOM errors
+        limit_chunks = False
         min_chunk, max_chunk = 0, 1000  # limits the number of chunks we process
 
         catalogue = self.get_tcc_catalogue()
@@ -481,7 +482,7 @@ def get_products_as_dataframe(self) -> gpd.GeoDataFrame:
                 if limit_reached:
                     break
 
-                if not (min_chunk <= query_slots_iterator < max_chunk):
+                if limit_chunks and (not (min_chunk <= query_slots_iterator < max_chunk)):
                     # This is a temporary measure to prevent OOM errors.
                     # We should find a better way to limit the number of products we process.
                     self._log_progress_message(
@@ -509,11 +510,12 @@ def get_products_as_dataframe(self) -> gpd.GeoDataFrame:
                     )
 
                     for future in concurrent.futures.as_completed(futures):
-                        new_products = future.result()
+                        future_result = future.result()
+                        new_products = [p for p in future_result if p.id not in product_ids]
+
                         if not new_products:
                             # Avoid doing unnecessary work, might add empty dataframes to the total dataframe.
                             continue
-                        new_products = [p for p in new_products if p.id not in product_ids]
                         self._log_progress_message(f"Number of new products {len(new_products)}", level=logging.DEBUG)
                         product_ids.update([p.id for p in new_products])
                         self._log_progress_message(f"Number of unique products {len(product_ids)}", level=logging.DEBUG)

diff --git a/tests/stacapi/test_endpoints.py b/tests/stacapi/test_endpoints.py
@@ -164,10 +164,6 @@ def test_get(self, requests_mock, empty_collection: Collection, collection_endpt
         assert empty_collection.to_dict() == actual_collection.to_dict()
         assert m.called
 
-    @pytest.mark.xfail(reason="Test not implemented yet")
-    def test_get_all(self, requests_mock, empty_collection: Collection, collection_endpt: CollectionsEndpoint):
-        assert False, "Test not implemented yet"
-
     def test_create(self, requests_mock, empty_collection: Collection, collection_endpt: CollectionsEndpoint):
         m = requests_mock.post(str(self.BASE_URL / "collections"), json=empty_collection.to_dict(), status_code=201)
         response_json = collection_endpt.create(empty_collection)