Skip to content

Commit

Permalink
fix: catch invalid scales
Browse files Browse the repository at this point in the history
  • Loading branch information
amfage committed Aug 31, 2023
1 parent 457f78b commit f6275a2
Showing 1 changed file with 25 additions and 11 deletions.
36 changes: 25 additions & 11 deletions tools/nz-imagery-collection-data.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
import subprocess
import yaml
from dateutil import parser, tz
from enum import Enum
from typing import Dict, List, Set, Union

from typing import Dict, List, TypedDict, Union
valid_scales: List[str] = ["500", "1000", "2000", "5000", "10000", "50000"]

def _run_command(command: List[str], cwd: Union[str, None]) -> "subprocess.CompletedProcess[bytes]":
try:
Expand All @@ -22,8 +24,25 @@ def _run_command(command: List[str], cwd: Union[str, None]) -> "subprocess.Compl
print(proc.stderr)
return proc

def _get_scale():
print("do stuff")
def _get_scale(links: List[Dict[str, str]]) -> str:
scales: List[str] = []
for link in links:
if link["rel"] == "item":
try:
scale = os.path.splitext(link["href"].split("_")[1])[0]
if scale in valid_scales:
if scale not in scales:
scales.append(scale)
else:
data_errors.append("invalid scale")
return "invalid scale"
except:
data_errors.append("cannot retrieve scale: invalid file format")
return "invalid scale"
if len(scales) != 1:
data_errors.append(f"{len(scales)} scales found, should be only 1")
return "invalid scale"
return scales[0]

def _format_date(date: str) -> datetime:
utc_tz = tz.gettz("UTC")
Expand All @@ -32,6 +51,7 @@ def _format_date(date: str) -> datetime:
try:
utc_time = parser.parse(date).replace(tzinfo=utc_tz)
except parser.ParserError as err:
data_errors.append(err)
raise Exception(f"Not a valid date: {err}") from err

nz_time: datetime = utc_time.astimezone(nz_tz)
Expand Down Expand Up @@ -80,14 +100,15 @@ def _tmp_target_edit(target: str) -> str:

for link in catalog_json["links"]:
if link["rel"] == "child":
data_errors = []
collection_link = os.path.abspath("./data/imagery-stac/" + link["href"])
with open(collection_link, encoding="utf-8") as collection:
collection_json = json.loads(collection.read())
source = os.path.join("s3://linz-imagery/", link["href"].strip("./"))
target = _tmp_target_edit(source)
start_datetime = _format_date(collection_json["extent"]["temporal"]["interval"][0][0])
end_datetime = _format_date(collection_json["extent"]["temporal"]["interval"][0][1])
# scale = _get_scale(collection_json["links"])
scale = _get_scale(collection_json["links"])

params = {
"source": source,
Expand All @@ -114,10 +135,3 @@ def _tmp_target_edit(target: str) -> str:
# file_name = link["href"].split("/")[-4:-2]
# file_name = f"{file_name[0]}-{file_name[1]}"
# formatted_file_name = file_name.replace("_", "-").replace(".", "-")



# csv_writer.writerow([file_name, collection_json["id"], collection_json["title"], collection_json["description"], collection_json["providers"]])
# with open('collection_ids.csv', 'w', newline='') as csvfile:
# csv_writer = csv.writer(csvfile, delimiter=',',
# quotechar='"', quoting=csv.QUOTE_MINIMAL)

0 comments on commit f6275a2

Please sign in to comment.