diff --git a/configs-datasets/ANIN/ERA5-Land-monthly-averaged-data-v2/.gitignore b/configs-datasets/ANIN/ERA5-Land-monthly-averaged-data-v2/.gitignore index 71952e9..e6af844 100644 --- a/configs-datasets/ANIN/ERA5-Land-monthly-averaged-data-v2/.gitignore +++ b/configs-datasets/ANIN/ERA5-Land-monthly-averaged-data-v2/.gitignore @@ -1,3 +1,4 @@ STAC_wip/* STAC_publish/* openeo-test-out/* +results/* \ No newline at end of file diff --git a/configs-datasets/PEOPLE_EA/Landsat_three-annual_NDWI_v1/workflow.ipynb b/configs-datasets/PEOPLE_EA/Landsat_three-annual_NDWI_v1/workflow.ipynb index 310e44d..f8e0179 100644 --- a/configs-datasets/PEOPLE_EA/Landsat_three-annual_NDWI_v1/workflow.ipynb +++ b/configs-datasets/PEOPLE_EA/Landsat_three-annual_NDWI_v1/workflow.ipynb @@ -23,6 +23,7 @@ "# Collection configuration\n", "catalog_version = \"v0.5\"\n", "collection_config_path = Path(\"config-collection.json\")\n", + "overwrite = True\n", "\n", "# Input Paths\n", "tiff_input_path = Path(\"C:/Users/VERHAERV/WorkingDirs/data/PEOPLE_EA/Landsat_three-annual_NDWI_v1\")\n", @@ -39,7 +40,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -125,64 +126,14 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Found 10 STAC items\n", - "First stac item:\n", - "{'assets': {'NDWI': {'description': 'Landsat NDWI',\n", - " 'eo:bands': [{'description': 'NDWI', 'name': 'NDWI'}],\n", - " 'file:size': 604265,\n", - " 'href': 'C:/Users/VERHAERV/WorkingDirs/data/PEOPLE_EA/Landsat_three-annual_NDWI_v1/2000/NDWI_2000_1_1.tif',\n", - " 'raster:bands': [{'data_type': 'float32'}],\n", - " 'roles': ['data'],\n", - " 'title': 'Landsat NDWI'}},\n", - " 'bbox': [-58.65728862678572,\n", - " 55.50073948839283,\n", - " -35.037744053124435,\n", - " 54.701421721384676],\n", - " 'geometry': {'coordinates': (((-58.65728862678572, 55.50073948839283),\n", - " (-58.65728862678572, 54.701421721384676),\n", - " (-35.037744053124435, 54.701421721384676),\n", - " (-35.037744053124435, 55.50073948839283),\n", - " (-58.65728862678572, 55.50073948839283)),),\n", - " 'type': 'Polygon'},\n", - " 'id': 'NDWI_2000_1_1',\n", - " 'links': [],\n", - " 'properties': {'created': '2024-02-13T10:38:18.333702Z',\n", - " 'datetime': '2000-01-01T00:00:00Z',\n", - " 'description': 'Landsat NDWI',\n", - " 'end_datetime': '2000-12-31T23:59:59Z',\n", - " 'eo:bands': [{'description': 'NDWI', 'name': 'NDWI'}],\n", - " 'product_version': '1.0.0',\n", - " 'proj:bbox': [665000.0, 4467700.0, 1689000.0, 5491700.0],\n", - " 'proj:epsg': 3035,\n", - " 'proj:geometry': {'coordinates': (((665000.0, 4467700.0),\n", - " (665000.0, 5491700.0),\n", - " (1689000.0, 5491700.0),\n", - " (1689000.0, 4467700.0),\n", - " (665000.0, 4467700.0)),),\n", - " 'type': 'Polygon'},\n", - " 'proj:shape': (10240, 10240),\n", - " 'proj:transform': [100.0,\n", - " 0.0,\n", - " 665000.0,\n", - " 0.0,\n", - " -100.0,\n", - " 5491700.0],\n", - " 'start_datetime': '2000-01-01T00:00:00Z'},\n", - " 'stac_extensions': ['https://stac-extensions.github.io/file/v2.0.0/schema.json',\n", - " 'https://stac-extensions.github.io/raster/v1.1.0/schema.json',\n", - " 'https://stac-extensions.github.io/projection/v1.1.0/schema.json',\n", - " 'https://stac-extensions.github.io/eo/v1.1.0/schema.json',\n", - " 'https://stac-extensions.github.io/classification/v1.0.0/schema.json',\n", - " 'https://stac-extensions.github.io/alternate-assets/v1.1.0/schema.json'],\n", - " 'stac_version': '1.0.0',\n", - " 'type': 'Feature'}\n" + "Found 0 STAC items\n" ] } ], @@ -192,33168 +143,48 @@ " collection_config_path=collection_config_path,\n", " glob=tiffs_glob,\n", " input_dir=tiff_input_path,\n", - " max_files=10\n", + " max_files=1\n", ")\n", "print(f\"Found {len(stac_items)} STAC items\")\n", "if failed_files: print(f\"Failed files: {failed_files}\")\n", - "print(\"First stac item:\")\n", - "pprint.pprint(stac_items[0].to_dict())" + "if len(stac_items) > 0:\n", + " print(\"First stac item:\")\n", + " stac_items[0]" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ - "# build collection\n", - "build_collection(\n", + "# build grouped collection\n", + "build_grouped_collections(\n", " collection_config_path=collection_config_path,\n", " glob=tiffs_glob,\n", " input_dir=tiff_input_path,\n", " output_dir=test_output_path,\n", - " overwrite=False,\n", + " overwrite=overwrite,\n", ")" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 15, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Converting UTC timezones encoded as 'Z' to +00:00...\n", - "=== item_files_in: ===\n", - "c:\\Users\\VERHAERV\\WorkingDirs\\stac-catalog-builder\\configs-datasets\\PEOPLE_EA\\Landsat_three-annual_NDWI_v1\\results\\test\\v0.5\\Landsat_three-annual_NDWI_v1\\2000\\NDWI_2000_1_1.json\n", - "c:\\Users\\VERHAERV\\WorkingDirs\\stac-catalog-builder\\configs-datasets\\PEOPLE_EA\\Landsat_three-annual_NDWI_v1\\results\\test\\v0.5\\Landsat_three-annual_NDWI_v1\\2000\\NDWI_2000_1_2.json\n", - "c:\\Users\\VERHAERV\\WorkingDirs\\stac-catalog-builder\\configs-datasets\\PEOPLE_EA\\Landsat_three-annual_NDWI_v1\\results\\test\\v0.5\\Landsat_three-annual_NDWI_v1\\2000\\NDWI_2000_1_3.json\n", - "c:\\Users\\VERHAERV\\WorkingDirs\\stac-catalog-builder\\configs-datasets\\PEOPLE_EA\\Landsat_three-annual_NDWI_v1\\results\\test\\v0.5\\Landsat_three-annual_NDWI_v1\\2000\\NDWI_2000_1_4.json\n", - "c:\\Users\\VERHAERV\\WorkingDirs\\stac-catalog-builder\\configs-datasets\\PEOPLE_EA\\Landsat_three-annual_NDWI_v1\\results\\test\\v0.5\\Landsat_three-annual_NDWI_v1\\2000\\NDWI_2000_1_5.json\n", - "c:\\Users\\VERHAERV\\WorkingDirs\\stac-catalog-builder\\configs-datasets\\PEOPLE_EA\\Landsat_three-annual_NDWI_v1\\results\\test\\v0.5\\Landsat_three-annual_NDWI_v1\\2000\\NDWI_2000_1_6.json\n", - "c:\\Users\\VERHAERV\\WorkingDirs\\stac-catalog-builder\\configs-datasets\\PEOPLE_EA\\Landsat_three-annual_NDWI_v1\\results\\test\\v0.5\\Landsat_three-annual_NDWI_v1\\2000\\NDWI_2000_1_7.json\n", - "c:\\Users\\VERHAERV\\WorkingDirs\\stac-catalog-builder\\configs-datasets\\PEOPLE_EA\\Landsat_three-annual_NDWI_v1\\results\\test\\v0.5\\Landsat_three-annual_NDWI_v1\\2000\\NDWI_2000_2_1.json\n", - "c:\\Users\\VERHAERV\\WorkingDirs\\stac-catalog-builder\\configs-datasets\\PEOPLE_EA\\Landsat_three-annual_NDWI_v1\\results\\test\\v0.5\\Landsat_three-annual_NDWI_v1\\2000\\NDWI_2000_2_2.json\n", - "c:\\Users\\VERHAERV\\WorkingDirs\\stac-catalog-builder\\configs-datasets\\PEOPLE_EA\\Landsat_three-annual_NDWI_v1\\results\\test\\v0.5\\Landsat_three-annual_NDWI_v1\\2000\\NDWI_2000_2_3.json\n", - "c:\\Users\\VERHAERV\\WorkingDirs\\stac-catalog-builder\\configs-datasets\\PEOPLE_EA\\Landsat_three-annual_NDWI_v1\\results\\test\\v0.5\\Landsat_three-annual_NDWI_v1\\2000\\NDWI_2000_2_4.json\n", - "c:\\Users\\VERHAERV\\WorkingDirs\\stac-catalog-builder\\configs-datasets\\PEOPLE_EA\\Landsat_three-annual_NDWI_v1\\results\\test\\v0.5\\Landsat_three-annual_NDWI_v1\\2000\\NDWI_2000_2_5.json\n", - "c:\\Users\\VERHAERV\\WorkingDirs\\stac-catalog-builder\\configs-datasets\\PEOPLE_EA\\Landsat_three-annual_NDWI_v1\\results\\test\\v0.5\\Landsat_three-annual_NDWI_v1\\2000\\NDWI_2000_2_6.json\n", - "c:\\Users\\VERHAERV\\WorkingDirs\\stac-catalog-builder\\configs-datasets\\PEOPLE_EA\\Landsat_three-annual_NDWI_v1\\results\\test\\v0.5\\Landsat_three-annual_NDWI_v1\\2000\\NDWI_2000_2_7.json\n", - "c:\\Users\\VERHAERV\\WorkingDirs\\stac-catalog-builder\\configs-datasets\\PEOPLE_EA\\Landsat_three-annual_NDWI_v1\\results\\test\\v0.5\\Landsat_three-annual_NDWI_v1\\2000\\NDWI_2000_3_1.json\n", - "c:\\Users\\VERHAERV\\WorkingDirs\\stac-catalog-builder\\configs-datasets\\PEOPLE_EA\\Landsat_three-annual_NDWI_v1\\results\\test\\v0.5\\Landsat_three-annual_NDWI_v1\\2000\\NDWI_2000_3_2.json\n", - "c:\\Users\\VERHAERV\\WorkingDirs\\stac-catalog-builder\\configs-datasets\\PEOPLE_EA\\Landsat_three-annual_NDWI_v1\\results\\test\\v0.5\\Landsat_three-annual_NDWI_v1\\2000\\NDWI_2000_3_3.json\n", - "c:\\Users\\VERHAERV\\WorkingDirs\\stac-catalog-builder\\configs-datasets\\PEOPLE_EA\\Landsat_three-annual_NDWI_v1\\results\\test\\v0.5\\Landsat_three-annual_NDWI_v1\\2000\\NDWI_2000_3_4.json\n", - "c:\\Users\\VERHAERV\\WorkingDirs\\stac-catalog-builder\\configs-datasets\\PEOPLE_EA\\Landsat_three-annual_NDWI_v1\\results\\test\\v0.5\\Landsat_three-annual_NDWI_v1\\2000\\NDWI_2000_3_5.json\n", - "c:\\Users\\VERHAERV\\WorkingDirs\\stac-catalog-builder\\configs-datasets\\PEOPLE_EA\\Landsat_three-annual_NDWI_v1\\results\\test\\v0.5\\Landsat_three-annual_NDWI_v1\\2000\\NDWI_2000_3_6.json\n", - "c:\\Users\\VERHAERV\\WorkingDirs\\stac-catalog-builder\\configs-datasets\\PEOPLE_EA\\Landsat_three-annual_NDWI_v1\\results\\test\\v0.5\\Landsat_three-annual_NDWI_v1\\2000\\NDWI_2000_3_7.json\n", - "c:\\Users\\VERHAERV\\WorkingDirs\\stac-catalog-builder\\configs-datasets\\PEOPLE_EA\\Landsat_three-annual_NDWI_v1\\results\\test\\v0.5\\Landsat_three-annual_NDWI_v1\\2000\\NDWI_2000_4_1.json\n", - "c:\\Users\\VERHAERV\\WorkingDirs\\stac-catalog-builder\\configs-datasets\\PEOPLE_EA\\Landsat_three-annual_NDWI_v1\\results\\test\\v0.5\\Landsat_three-annual_NDWI_v1\\2000\\NDWI_2000_4_2.json\n", - "c:\\Users\\VERHAERV\\WorkingDirs\\stac-catalog-builder\\configs-datasets\\PEOPLE_EA\\Landsat_three-annual_NDWI_v1\\results\\test\\v0.5\\Landsat_three-annual_NDWI_v1\\2000\\NDWI_2000_4_3.json\n", - "c:\\Users\\VERHAERV\\WorkingDirs\\stac-catalog-builder\\configs-datasets\\PEOPLE_EA\\Landsat_three-annual_NDWI_v1\\results\\test\\v0.5\\Landsat_three-annual_NDWI_v1\\2000\\NDWI_2000_4_4.json\n", - "c:\\Users\\VERHAERV\\WorkingDirs\\stac-catalog-builder\\configs-datasets\\PEOPLE_EA\\Landsat_three-annual_NDWI_v1\\results\\test\\v0.5\\Landsat_three-annual_NDWI_v1\\2000\\NDWI_2000_4_5.json\n", - "c:\\Users\\VERHAERV\\WorkingDirs\\stac-catalog-builder\\configs-datasets\\PEOPLE_EA\\Landsat_three-annual_NDWI_v1\\results\\test\\v0.5\\Landsat_three-annual_NDWI_v1\\2000\\NDWI_2000_4_6.json\n", - "c:\\Users\\VERHAERV\\WorkingDirs\\stac-catalog-builder\\configs-datasets\\PEOPLE_EA\\Landsat_three-annual_NDWI_v1\\results\\test\\v0.5\\Landsat_three-annual_NDWI_v1\\2000\\NDWI_2000_4_7.json\n", - "c:\\Users\\VERHAERV\\WorkingDirs\\stac-catalog-builder\\configs-datasets\\PEOPLE_EA\\Landsat_three-annual_NDWI_v1\\results\\test\\v0.5\\Landsat_three-annual_NDWI_v1\\2000\\NDWI_2000_5_1.json\n", - "c:\\Users\\VERHAERV\\WorkingDirs\\stac-catalog-builder\\configs-datasets\\PEOPLE_EA\\Landsat_three-annual_NDWI_v1\\results\\test\\v0.5\\Landsat_three-annual_NDWI_v1\\2000\\NDWI_2000_5_2.json\n", - "c:\\Users\\VERHAERV\\WorkingDirs\\stac-catalog-builder\\configs-datasets\\PEOPLE_EA\\Landsat_three-annual_NDWI_v1\\results\\test\\v0.5\\Landsat_three-annual_NDWI_v1\\2000\\NDWI_2000_5_3.json\n", - "c:\\Users\\VERHAERV\\WorkingDirs\\stac-catalog-builder\\configs-datasets\\PEOPLE_EA\\Landsat_three-annual_NDWI_v1\\results\\test\\v0.5\\Landsat_three-annual_NDWI_v1\\2000\\NDWI_2000_5_4.json\n", - "c:\\Users\\VERHAERV\\WorkingDirs\\stac-catalog-builder\\configs-datasets\\PEOPLE_EA\\Landsat_three-annual_NDWI_v1\\results\\test\\v0.5\\Landsat_three-annual_NDWI_v1\\2000\\NDWI_2000_5_5.json\n", - "c:\\Users\\VERHAERV\\WorkingDirs\\stac-catalog-builder\\configs-datasets\\PEOPLE_EA\\Landsat_three-annual_NDWI_v1\\results\\test\\v0.5\\Landsat_three-annual_NDWI_v1\\2000\\NDWI_2000_5_6.json\n", - "c:\\Users\\VERHAERV\\WorkingDirs\\stac-catalog-builder\\configs-datasets\\PEOPLE_EA\\Landsat_three-annual_NDWI_v1\\results\\test\\v0.5\\Landsat_three-annual_NDWI_v1\\2000\\NDWI_2000_5_7.json\n", - "\n", - "\n" - ] - }, - { - "ename": "ValueError", - "evalue": "'c:\\\\Users\\\\VERHAERV\\\\WorkingDirs\\\\stac-catalog-builder\\\\configs-datasets\\\\PEOPLE_EA\\\\Landsat_three-annual_NDWI_v1\\\\results\\\\test\\\\v0.5\\\\Landsat_three-annual_NDWI_v1\\\\2000\\\\NDWI_2000_1_1.json' is not in the subpath of 'results\\\\test\\\\v0.5' OR one path is relative and the other is absolute.", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn[3], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# build grouped collection\u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m \u001b[43mbuild_grouped_collections\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mcollection_config_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcollection_config_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mglob\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtiffs_glob\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtiff_input_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43moutput_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtest_output_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[43moverwrite\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[0;32m 8\u001b[0m \u001b[43m)\u001b[49m\n", - "File \u001b[1;32m~\\WorkingDirs\\stac-catalog-builder\\stacbuilder\\commandapi.py:112\u001b[0m, in \u001b[0;36mbuild_grouped_collections\u001b[1;34m(collection_config_path, glob, input_dir, output_dir, overwrite, max_files, save_dataframe)\u001b[0m\n\u001b[0;32m 103\u001b[0m output_dir \u001b[38;5;241m=\u001b[39m Path(output_dir)\u001b[38;5;241m.\u001b[39mexpanduser()\u001b[38;5;241m.\u001b[39mabsolute()\n\u001b[0;32m 105\u001b[0m pipeline \u001b[38;5;241m=\u001b[39m GeoTiffPipeline\u001b[38;5;241m.\u001b[39mfrom_config(\n\u001b[0;32m 106\u001b[0m collection_config\u001b[38;5;241m=\u001b[39mcoll_cfg,\n\u001b[0;32m 107\u001b[0m file_coll_cfg\u001b[38;5;241m=\u001b[39mfile_coll_cfg,\n\u001b[0;32m 108\u001b[0m output_dir\u001b[38;5;241m=\u001b[39moutput_dir,\n\u001b[0;32m 109\u001b[0m overwrite\u001b[38;5;241m=\u001b[39moverwrite,\n\u001b[0;32m 110\u001b[0m )\n\u001b[1;32m--> 112\u001b[0m \u001b[43mpipeline\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbuild_grouped_collections\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 114\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m save_dataframe:\n\u001b[0;32m 115\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m collection \u001b[38;5;129;01min\u001b[39;00m pipeline\u001b[38;5;241m.\u001b[39mcollection_groups\u001b[38;5;241m.\u001b[39mvalues():\n", - "File \u001b[1;32m~\\WorkingDirs\\stac-catalog-builder\\stacbuilder\\builder.py:1068\u001b[0m, in \u001b[0;36mGeoTiffPipeline.build_grouped_collections\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1066\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mbuild_grouped_collections\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m 1067\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreset()\n\u001b[1;32m-> 1068\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_asset_metadata_pipeline\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbuild_grouped_collections\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[1;32m~\\WorkingDirs\\stac-catalog-builder\\stacbuilder\\builder.py:925\u001b[0m, in \u001b[0;36mAssetMetadataPipeline.build_grouped_collections\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 923\u001b[0m coll_file \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_collection_builder\u001b[38;5;241m.\u001b[39mcollection_file\n\u001b[0;32m 924\u001b[0m post_processor \u001b[38;5;241m=\u001b[39m PostProcessSTACCollectionFile(collection_overrides\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_collection_config\u001b[38;5;241m.\u001b[39moverrides)\n\u001b[1;32m--> 925\u001b[0m \u001b[43mpost_processor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprocess_collection\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcoll_file\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[1;32m~\\WorkingDirs\\stac-catalog-builder\\stacbuilder\\builder.py:587\u001b[0m, in \u001b[0;36mPostProcessSTACCollectionFile.process_collection\u001b[1;34m(self, collection_file, output_dir)\u001b[0m\n\u001b[0;32m 585\u001b[0m out_dir: Path \u001b[38;5;241m=\u001b[39m output_dir \u001b[38;5;129;01mor\u001b[39;00m collection_file\u001b[38;5;241m.\u001b[39mparent\n\u001b[0;32m 586\u001b[0m new_coll_file, _ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_create_post_proc_directory_structure(collection_file, out_dir)\n\u001b[1;32m--> 587\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_convert_timezones_encoded_as_z\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcollection_file\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mout_dir\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 589\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcollection_overrides:\n\u001b[0;32m 590\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_override_collection_components(new_coll_file)\n", - "File \u001b[1;32m~\\WorkingDirs\\stac-catalog-builder\\stacbuilder\\builder.py:646\u001b[0m, in \u001b[0;36mPostProcessSTACCollectionFile._convert_timezones_encoded_as_z\u001b[1;34m(cls, collection_file, output_dir)\u001b[0m\n\u001b[0;32m 644\u001b[0m out_dir \u001b[38;5;241m=\u001b[39m output_dir \u001b[38;5;129;01mor\u001b[39;00m collection_file\u001b[38;5;241m.\u001b[39mparent\n\u001b[0;32m 645\u001b[0m item_paths \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39mget_item_paths_for_coll_file(collection_file)\n\u001b[1;32m--> 646\u001b[0m \u001b[43mconv\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprocess_catalog\u001b[49m\u001b[43m(\u001b[49m\u001b[43min_coll_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcollection_file\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43min_item_paths\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mitem_paths\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moutput_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mout_dir\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[1;32m~\\WorkingDirs\\stac-catalog-builder\\stacbuilder\\timezoneformat.py:159\u001b[0m, in \u001b[0;36mTimezoneFormatConverter.process_catalog\u001b[1;34m(self, in_coll_path, in_item_paths, output_dir)\u001b[0m\n\u001b[0;32m 157\u001b[0m num_files \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlen\u001b[39m(in_item_paths)\n\u001b[0;32m 158\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i, item_path \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(in_item_paths):\n\u001b[1;32m--> 159\u001b[0m rel_path \u001b[38;5;241m=\u001b[39m \u001b[43mitem_path\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrelative_to\u001b[49m\u001b[43m(\u001b[49m\u001b[43min_coll_path\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mparent\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 160\u001b[0m out_path \u001b[38;5;241m=\u001b[39m output_dir \u001b[38;5;241m/\u001b[39m rel_path\n\u001b[0;32m 161\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPROGRESS: converting STAC item \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mi\u001b[38;5;241m+\u001b[39m\u001b[38;5;241m1\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnum_files\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mitem_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mto:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mout_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", - "File \u001b[1;32mc:\\Users\\VERHAERV\\AppData\\Local\\miniconda3\\envs\\stac-catalog-builder\\Lib\\pathlib.py:730\u001b[0m, in \u001b[0;36mPurePath.relative_to\u001b[1;34m(self, *other)\u001b[0m\n\u001b[0;32m 728\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (root \u001b[38;5;129;01mor\u001b[39;00m drv) \u001b[38;5;28;01mif\u001b[39;00m n \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m cf(abs_parts[:n]) \u001b[38;5;241m!=\u001b[39m cf(to_abs_parts):\n\u001b[0;32m 729\u001b[0m formatted \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_format_parsed_parts(to_drv, to_root, to_parts)\n\u001b[1;32m--> 730\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{!r}\u001b[39;00m\u001b[38;5;124m is not in the subpath of \u001b[39m\u001b[38;5;132;01m{!r}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 731\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m OR one path is relative and the other is absolute.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 732\u001b[0m \u001b[38;5;241m.\u001b[39mformat(\u001b[38;5;28mstr\u001b[39m(\u001b[38;5;28mself\u001b[39m), \u001b[38;5;28mstr\u001b[39m(formatted)))\n\u001b[0;32m 733\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_from_parsed_parts(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m, root \u001b[38;5;28;01mif\u001b[39;00m n \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m 734\u001b[0m abs_parts[n:])\n", - "\u001b[1;31mValueError\u001b[0m: 'c:\\\\Users\\\\VERHAERV\\\\WorkingDirs\\\\stac-catalog-builder\\\\configs-datasets\\\\PEOPLE_EA\\\\Landsat_three-annual_NDWI_v1\\\\results\\\\test\\\\v0.5\\\\Landsat_three-annual_NDWI_v1\\\\2000\\\\NDWI_2000_1_1.json' is not in the subpath of 'results\\\\test\\\\v0.5' OR one path is relative and the other is absolute." - ] - } - ], + "outputs": [], "source": [ - "# build grouped collection\n", - "build_grouped_collections(\n", - " collection_config_path=collection_config_path,\n", - " glob=tiffs_glob,\n", - " input_dir=tiff_input_path,\n", - " output_dir=test_output_path,\n", - " overwrite=True,\n", + "# validate collection\n", + "validate_collection(\n", + " collection_file=test_output_path / \"collection.json\",\n", ")" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "\n", - "
\n", - "
\n", - " \n", - "
\n", - "
" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# show collection\n", "load_collection(\n", @@ -33361,15 +192,27 @@ ")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Legacy\n", + "better to use grouped collections" + ] + }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# validate collection\n", - "validate_collection(\n", - " collection_file=test_output_path / \"collection.json\",\n", + "# build collection\n", + "build_collection(\n", + " collection_config_path=collection_config_path,\n", + " glob=tiffs_glob,\n", + " input_dir=tiff_input_path,\n", + " output_dir=test_output_path,\n", + " overwrite=overwrite,\n", ")" ] } diff --git a/stacbuilder/builder.py b/stacbuilder/builder.py index 55ec7f5..79a79d6 100644 --- a/stacbuilder/builder.py +++ b/stacbuilder/builder.py @@ -5,6 +5,7 @@ # Standard libraries import datetime as dt +from http.client import RemoteDisconnected import json import logging import shutil @@ -380,7 +381,12 @@ class STACCollectionBuilder: TODO: reduce this class to functions in the class: AssetMetadataPipeline """ - def __init__(self, collection_config: CollectionConfig, output_dir: Path, overwrite: bool = False) -> None: + def __init__( + self, + collection_config: CollectionConfig, + output_dir: Path, + overwrite: bool = False, + ) -> None: # Settings: these are just data, not components we delegate work to. self._collection_config = collection_config @@ -390,6 +396,7 @@ def __init__(self, collection_config: CollectionConfig, output_dir: Path, overwr + f"{output_dir=!r}" ) self._output_dir = Path(output_dir) + self._overwrite_output = overwrite # Internal temporary state @@ -434,22 +441,23 @@ def collection_file(self) -> Path: def collection(self) -> Optional[Collection]: return self._collection - def build_collection(self, stac_items: Iterable[Item]) -> None: + def build_collection( + self, + stac_items: Iterable[Item], + group: Optional[str | int] = None, + ) -> None: """Create and save the STAC collection.""" self.reset() self._stac_items = list(stac_items) or [] - self.create_collection() - self.save_collection() + self.create_empty_collection(group=group) + self.add_items_to_collection() - # We save before we validate, because when the validation fails we want - # to be able to inspect the incorrect result. - # self.validate_collection(self.collection) - - def create_collection( + def add_items_to_collection( self, ): - """Create a empty pystac.Collection for the dataset.""" - self._create_empty_collection() + """Fills the collection with stac items.""" + if self._collection is None: + raise InvalidOperation("Can not add items to a collection that has not been created yet.") item: Item for item in self._stac_items: @@ -461,13 +469,14 @@ def create_collection( self._collection.update_extent_from_items() + def normalize_hrefs(self, skip_unresolved: bool = False): layout_template = self._collection_config.layout_strategy_item_template strategy = TemplateLayoutStrategy(item_template=layout_template) - output_dir_str = self.output_dir.as_posix() - if output_dir_str.endswith("/"): - output_dir_str = output_dir_str[-1] - self._collection.normalize_hrefs(output_dir_str, strategy=strategy) + out_dir_str = self.output_dir.as_posix() + if out_dir_str.endswith("/"): + out_dir_str = out_dir_str[-1] + self._collection.normalize_hrefs(root_href=out_dir_str, strategy=strategy, skip_unresolved=skip_unresolved) def validate_collection(self, collection: Collection): """Run STAC validation on the collection.""" @@ -476,6 +485,8 @@ def validate_collection(self, collection: Collection): except STACValidationError as exc: print(exc) raise + except RemoteDisconnected: + print("Skipped this step validation due to RemoteDisconnected.") else: print(f"Collection valid: number of items validated: {num_items_validated}") @@ -496,13 +507,20 @@ def save_collection(self) -> None: def providers(self): return [p.to_provider() for p in self._collection_config.providers] - def _create_empty_collection(self) -> None: + def create_empty_collection(self, group: Optional[str | int] = None) -> None: """Creates a STAC Collection with no STAC items.""" - coll_config: CollectionConfig = self._collection_config + + if group: + id = coll_config.collection_id + f"_{group}" + title = coll_config.title + f" {group}" + else: + id = coll_config.collection_id + title = coll_config.title + collection = Collection( - id=coll_config.collection_id, - title=coll_config.title, + id=id, + title=title, description=coll_config.description, keywords=coll_config.keywords, providers=self.providers, @@ -612,9 +630,6 @@ def process_collection(self, collection_file: Path, output_dir: Optional[Path] = self._override_collection_components(data) self._save_collection_as_dict(data, new_coll_file) - # Check if the new file is still valid STAC. - self._validate_collection(Collection.from_file(new_coll_file)) - def is_in_place_processing(self, collection_file: Path, output_dir: Path) -> bool: return not output_dir or (output_dir.exists() and collection_file.parent.samefile(output_dir)) @@ -672,7 +687,6 @@ def _create_post_proc_directory_structure( shutil.copy2(old_path, new_path) def _override_collection_components(self, data: Dict[str, Any]) -> None: - print("Overriding components of STAC collection that we want to give some fixed value ...") overrides = self.collection_overrides for key, new_value in overrides.items(): @@ -686,17 +700,6 @@ def _override_collection_components(self, data: Dict[str, Any]) -> None: sub_dict = sub_dict[sub_key] sub_dict[deepest_key] = new_value - @staticmethod - def _validate_collection(collection: Collection): - """Run STAC validation on the collection.""" - try: - num_items_validated = collection.validate_all(recursive=True) - except STACValidationError as exc: - print(exc) - raise - else: - print(f"Collection valid: number of items validated: {num_items_validated}") - @staticmethod def _load_collection_as_dict(coll_file: Path) -> dict: with open(coll_file, "r") as f_in: @@ -882,7 +885,10 @@ def collect_stac_items(self): # Ignore the asset when the file was not a known asset type, for example it is # not a GeoTIFF or it is not one of the assets or bands we want to include. if stac_item: - stac_item.validate() + try: + stac_item.validate() + except RemoteDisconnected: + print(f"Skipped validation of {stac_item.get_self_href()} due to RemoteDisconnected.") yield stac_item # TODO: [simplify] [refactor] Merge this into collect_stac_items once it works well and it has tests. @@ -921,6 +927,8 @@ def build_collection(self): self.reset() self._collection_builder.build_collection(self.collect_stac_items()) + self._collection_builder.normalize_hrefs() + self._collection_builder.save_collection() self._collection = self._collection_builder.collection coll_file = self._collection_builder.collection_file @@ -936,13 +944,28 @@ def build_grouped_collections(self): if not self.uses_collection_groups: raise InvalidOperation(f"This instance of {self.__class__.__name__} does not have grouping.") + self._root_collection_builder = STACCollectionBuilder( + collection_config=self._collection_config, + overwrite=self._overwrite, + output_dir=self._output_base_dir, + ) + self._root_collection_builder.create_empty_collection() + for group, metadata_list in sorted(self.group_stac_items_by().items()): self._setup_internals(group=group) - self._collection_builder.build_collection(metadata_list) + self._collection_builder.build_collection(stac_items=metadata_list, group=group) + self._root_collection_builder.collection.add_child(self._collection_builder.collection) self._collection_groups[group] = self._collection_builder.collection - post_processor = PostProcessSTACCollectionFile(collection_overrides=self._collection_config.overrides) + self._root_collection_builder.normalize_hrefs() + self._root_collection_builder.collection.update_extent_from_items() + self._root_collection_builder.save_collection() + + # post process + post_processor = PostProcessSTACCollectionFile(collection_overrides=self._collection_config.overrides) + post_processor.process_collection(self._root_collection_builder.collection_file) + for group in self._collection_groups.keys(): coll_file = Path(self._collection_groups[group].self_href) post_processor.process_collection(coll_file)