fix: catch invalid scales

linz · Aug 31, 2023 · f6275a2 · f6275a2
1 parent 457f78b
commit f6275a2
Showing 1 changed file with 25 additions and 11 deletions.
diff --git a/tools/nz-imagery-collection-data.py b/tools/nz-imagery-collection-data.py
@@ -4,8 +4,10 @@
 import subprocess
 import yaml
 from dateutil import parser, tz
+from enum import Enum
+from typing import Dict, List, Set, Union
 
-from typing import Dict, List, TypedDict, Union
+valid_scales: List[str] = ["500", "1000", "2000", "5000", "10000", "50000"]
 
 def _run_command(command: List[str], cwd: Union[str, None]) -> "subprocess.CompletedProcess[bytes]":
     try:
@@ -22,8 +24,25 @@ def _run_command(command: List[str], cwd: Union[str, None]) -> "subprocess.Compl
         print(proc.stderr)
     return proc
 
-def _get_scale():
-    print("do stuff")
+def _get_scale(links: List[Dict[str, str]]) -> str:
+    scales: List[str] = []
+    for link in links:
+        if link["rel"] == "item":
+            try:
+                scale = os.path.splitext(link["href"].split("_")[1])[0]
+                if scale in valid_scales:
+                    if scale not in scales:
+                        scales.append(scale)
+                else:
+                    data_errors.append("invalid scale")
+                    return "invalid scale"
+            except:
+                data_errors.append("cannot retrieve scale: invalid file format")
+                return "invalid scale"
+    if len(scales) != 1:
+        data_errors.append(f"{len(scales)} scales found, should be only 1")
+        return "invalid scale"
+    return scales[0]
 
 def _format_date(date: str) -> datetime:
     utc_tz = tz.gettz("UTC")
@@ -32,6 +51,7 @@ def _format_date(date: str) -> datetime:
     try:
         utc_time = parser.parse(date).replace(tzinfo=utc_tz)
     except parser.ParserError as err:
+        data_errors.append(err)
         raise Exception(f"Not a valid date: {err}") from err
 
     nz_time: datetime = utc_time.astimezone(nz_tz)
@@ -80,14 +100,15 @@ def _tmp_target_edit(target: str) -> str:
 
 for link in catalog_json["links"]:
     if link["rel"] == "child":
+        data_errors = []
         collection_link = os.path.abspath("./data/imagery-stac/" + link["href"])
         with open(collection_link, encoding="utf-8") as collection:
             collection_json = json.loads(collection.read())
             source = os.path.join("s3://linz-imagery/", link["href"].strip("./"))
             target = _tmp_target_edit(source)
             start_datetime = _format_date(collection_json["extent"]["temporal"]["interval"][0][0])
             end_datetime = _format_date(collection_json["extent"]["temporal"]["interval"][0][1])
-            # scale = _get_scale(collection_json["links"])
+            scale = _get_scale(collection_json["links"])
 
             params = {
                 "source": source,
@@ -114,10 +135,3 @@ def _tmp_target_edit(target: str) -> str:
             # file_name = link["href"].split("/")[-4:-2]
             # file_name = f"{file_name[0]}-{file_name[1]}"
             # formatted_file_name = file_name.replace("_", "-").replace(".", "-")
-
-
-
-#            csv_writer.writerow([file_name, collection_json["id"], collection_json["title"], collection_json["description"], collection_json["providers"]])
-# with open('collection_ids.csv', 'w', newline='') as csvfile:
-#     csv_writer = csv.writer(csvfile, delimiter=',',
-#                             quotechar='"', quoting=csv.QUOTE_MINIMAL)