Skip to content

Commit

Permalink
fixed env files
Browse files Browse the repository at this point in the history
  • Loading branch information
VictorVerhaert committed Apr 24, 2024
1 parent d8572ff commit 856ca41
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 9 deletions.
4 changes: 2 additions & 2 deletions conda-environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ dependencies:
- click=8.1.*
- geopandas=0.14.*
- openeo=0.26.*
- psutil
- pyarrow=14.0.*
- pydantic=2.5.*
- pyproj=3.6.*
Expand All @@ -17,12 +18,11 @@ dependencies:
- shapely=2.0.*
- stac-validator=3.3.*
- stactools=0.5.*
- psutil
- ipykernel
- jupyter
- pytest
- pip
- yarl=1.9.*
- pip
- pip:
- --extra-index-url https://artifactory.vgt.vito.be/artifactory/api/pypi/python-packages/simple
- terracatalogueclient==0.1.14
Expand Down
1 change: 1 addition & 0 deletions requirements/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ click==8.1.*
dynaconf==3.2.*
geopandas==0.14.*
openeo==0.26.*
psutils
pyarrow==14.0.*
pydantic==2.5.*
pyproj==3.6.*
Expand Down
8 changes: 5 additions & 3 deletions stacbuilder/terracatalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,7 @@ def get_products_as_dataframe(self) -> gpd.GeoDataFrame:

# HACK parameters to split up calculation into smaller chunks
slice_length = 100 # limits the active threads to prevent OOM errors
limit_chunks = False
min_chunk, max_chunk = 0, 1000 # limits the number of chunks we process

catalogue = self.get_tcc_catalogue()
Expand All @@ -481,7 +482,7 @@ def get_products_as_dataframe(self) -> gpd.GeoDataFrame:
if limit_reached:
break

if not (min_chunk <= query_slots_iterator < max_chunk):
if limit_chunks and (not (min_chunk <= query_slots_iterator < max_chunk)):
# This is a temporary measure to prevent OOM errors.
# We should find a better way to limit the number of products we process.
self._log_progress_message(
Expand Down Expand Up @@ -509,11 +510,12 @@ def get_products_as_dataframe(self) -> gpd.GeoDataFrame:
)

for future in concurrent.futures.as_completed(futures):
new_products = future.result()
future_result = future.result()
new_products = [p for p in future_result if p.id not in product_ids]

if not new_products:
# Avoid doing unnecessary work, might add empty dataframes to the total dataframe.
continue
new_products = [p for p in new_products if p.id not in product_ids]
self._log_progress_message(f"Number of new products {len(new_products)}", level=logging.DEBUG)
product_ids.update([p.id for p in new_products])
self._log_progress_message(f"Number of unique products {len(product_ids)}", level=logging.DEBUG)
Expand Down
4 changes: 0 additions & 4 deletions tests/stacapi/test_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,10 +164,6 @@ def test_get(self, requests_mock, empty_collection: Collection, collection_endpt
assert empty_collection.to_dict() == actual_collection.to_dict()
assert m.called

@pytest.mark.xfail(reason="Test not implemented yet")
def test_get_all(self, requests_mock, empty_collection: Collection, collection_endpt: CollectionsEndpoint):
assert False, "Test not implemented yet"

def test_create(self, requests_mock, empty_collection: Collection, collection_endpt: CollectionsEndpoint):
m = requests_mock.post(str(self.BASE_URL / "collections"), json=empty_collection.to_dict(), status_code=201)
response_json = collection_endpt.create(empty_collection)
Expand Down

0 comments on commit 856ca41

Please sign in to comment.