Skip to content

Commit

Permalink
Merge branch 'devel'
Browse files Browse the repository at this point in the history
  • Loading branch information
cdbethune committed Jan 16, 2025
2 parents df52974 + da86810 commit 0faae67
Show file tree
Hide file tree
Showing 36 changed files with 462 additions and 158 deletions.
10 changes: 10 additions & 0 deletions cdr/deploy/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,16 @@ FROM python:3.10-slim
RUN apt-get update && apt-get install -y git build-essential ffmpeg libsm6 libxext6
RUN apt-get install -y libgdal-dev g++ --no-install-recommends && apt-get clean -y

# setup DOI root cert
COPY certs/DOIRootCA2.crt /usr/local/share/ca-certificates
RUN chmod 644 /usr/local/share/ca-certificates/DOIRootCA2.crt && \
update-ca-certificates
ENV PIP_CERT="/etc/ssl/certs/ca-certificates.crt" \
SSL_CERT_FILE="/etc/ssl/certs/ca-certificates.crt" \
CURL_CA_BUNDLE="/etc/ssl/certs/ca-certificates.crt" \
REQUESTS_CA_BUNDLE="/etc/ssl/certs/ca-certificates.crt" \
AWS_CA_BUNDLE="/etc/ssl/certs/ca-certificates.crt"

RUN mkdir /app

COPY tasks tasks
Expand Down
4 changes: 3 additions & 1 deletion cdr/deploy/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ cp ../pyproject.toml cdr
cp -r ../../schema .
cp -r ../../tasks .
cp -r ../../util .
cp -r ../../certs .

# run the build with the platform argument if provided, otherwise build for the host architecture
platform=${1:-}
Expand All @@ -21,4 +22,5 @@ fi
rm -rf cdr
rm -rf schema
rm -rf tasks
rm -rf util
rm -rf util
rm -rf certs
2 changes: 1 addition & 1 deletion cdr/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "lara-cdr"
version = "0.1.0"
version = "1.0.0"
description = "LARA CDR integration supporting both one-off processing and webhook event-driven processing"
readme = "README.md"
dependencies = ["jsons", "flask", "lara-tasks", "mypy-boto3-s3", "ngrok", "pyproj"]
Expand Down
15 changes: 12 additions & 3 deletions cdr/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import ngrok
import os
import requests
import secrets

from datetime import datetime
from typing import Any, Dict, List, Optional
Expand Down Expand Up @@ -58,6 +59,7 @@ class Settings:
output: str
callback_secret: str
callback_url: str
callback_token: str
registration_id: Dict[str, str] = {}
rabbitmq_host: str
sequence: List[str] = []
Expand Down Expand Up @@ -86,6 +88,12 @@ def process_cdr_event():
evt = request.get_json(force=True)
logger.info(f"event data received {evt['event']}")

# Check the token in the header
auth_token = request.headers.get("Authorization")
if auth_token != f"Bearer {settings.callback_token}":
logger.error(f"Invalid callback token {auth_token}")
return Response("Unauthorized", status=401)

map_event: Optional[MapEventPayload] = None
try:
# handle event directly or create lara request
Expand Down Expand Up @@ -177,9 +185,8 @@ def register_cdr_system():
"version": system_version,
"callback_url": settings.callback_url,
"webhook_secret": settings.callback_secret,
# Leave blank if callback url has no auth requirement
# "auth_header": "",
# "auth_token": "",
"auth_header": "Authorization",
"auth_token": f"Bearer {settings.callback_token}",
"events": events,
}

Expand Down Expand Up @@ -429,6 +436,7 @@ def main():
"--cdr_callback_secret", type=str, default=DEFAULT_CDR_CALLBACK_SECRET
)
parser.add_argument("--cdr_callback_url", type=str, default=None)
parser.add_argument("--cdr_callback_token", type=str, default=None)
parser.add_argument("--app_port", type=int, default=DEFAULT_APP_PORT)
parser.add_argument("--host", type=str, default="localhost")
parser.add_argument("--rabbit_port", type=int, default=5672)
Expand All @@ -452,6 +460,7 @@ def main():
settings.cdr_host = p.cdr_host
settings.cog_host = p.cog_host
settings.callback_secret = p.cdr_callback_secret
settings.callback_token = p.cdr_callback_token or secrets.token_urlsafe(32)
settings.sequence = p.sequence
settings.replay_start = p.replay_start if hasattr(p, "replay_start") else None
settings.replay_end = p.replay_end if hasattr(p, "replay_end") else None
Expand Down
10 changes: 10 additions & 0 deletions cdr_writer/deploy/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,16 @@ FROM python:3.10-slim
RUN apt-get update && apt-get install -y git build-essential ffmpeg libsm6 libxext6
RUN apt-get install -y libgdal-dev g++ --no-install-recommends && apt-get clean -y

# setup DOI root cert
COPY certs/DOIRootCA2.crt /usr/local/share/ca-certificates
RUN chmod 644 /usr/local/share/ca-certificates/DOIRootCA2.crt && \
update-ca-certificates
ENV PIP_CERT="/etc/ssl/certs/ca-certificates.crt" \
SSL_CERT_FILE="/etc/ssl/certs/ca-certificates.crt" \
CURL_CA_BUNDLE="/etc/ssl/certs/ca-certificates.crt" \
REQUESTS_CA_BUNDLE="/etc/ssl/certs/ca-certificates.crt" \
AWS_CA_BUNDLE="/etc/ssl/certs/ca-certificates.crt"

RUN mkdir /app

COPY tasks tasks
Expand Down
4 changes: 3 additions & 1 deletion cdr_writer/deploy/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ cp ../pyproject.toml cdr_writer
cp -r ../../schema .
cp -r ../../tasks .
cp -r ../../util .
cp -r ../../certs .

# run the build with the platform argument if provided, otherwise build for the host architecture
platform=${1:-}
Expand All @@ -21,4 +22,5 @@ fi
rm -rf cdr_writer
rm -rf schema
rm -rf tasks
rm -rf util
rm -rf util
rm -rf certs
2 changes: 1 addition & 1 deletion cdr_writer/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "lara-cdr-writer"
version = "0.1.0"
version = "1.0.0"
description = "LARA CDR writer"
readme = "README.md"
dependencies = ["jsons", "lara-tasks", "mypy-boto3-s3", "pyproj"]
Expand Down
6 changes: 4 additions & 2 deletions cdr_writer/write_result_subscriber.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def _process_lara_result(
body_decoded = json.loads(body.decode())
result = RequestResult.model_validate(body_decoded)
logger.info(
f"processing result for request {result.id} of type {result.output_type}"
f"processing result for request {result.id} for {result.image_id} of type {result.output_type}"
)

# add metric of job starting
Expand Down Expand Up @@ -122,7 +122,9 @@ def _process_lara_result(
)

except Exception as e:
logger.exception(f"Error processing lara result: {e}")
logger.exception(e)
if self._metrics_url != "":
requests.post(self._metrics_url + "/counter/writer_errored?step=1")

logger.info("result processing finished")

Expand Down
56 changes: 56 additions & 0 deletions certs/DOIRootCA2.crt
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
-----BEGIN CERTIFICATE-----
MIIJ+jCCB+KgAwIBAgIQSeV7u0WVHrFBqkjcKiEnEzANBgkqhkiG9w0BAQsFADAV
MRMwEQYDVQQDEwpET0lSb290Q0EyMB4XDTE2MDQyNjE4MjE1MFoXDTM2MDQyNjE4
MjE1MFowFTETMBEGA1UEAxMKRE9JUm9vdENBMjCCAiIwDQYJKoZIhvcNAQEBBQAD
ggIPADCCAgoCggIBANTHy0AvCFT0CZOsktj3kpFfFJgrhiS5haBK7DvYYpbZoaWa
jOpldnvoqnd1bOJUJ9jUwxZERo27FJHZ8HSdU8ac63hdK1N6t1OrlQOjgs/Kn0LY
b7xOf6iDBfhdhXcjpq2KZBzVi0tWuyBPhDmZyTLqZ5UREy1mV3/p1pJ3Hx5lPziH
Fid+wILxe2fk+N1ExY/GA+cgdsSCP4kp4aBxXLNTq+oMu/NahyF+NmcbqUk3xh+v
U1UA7h35b8kjd/3Kx2Bv1EDveWbzaS7sn8T3OVnU6n9UObUcqaoJOXF3PDdQqPIh
YVLT8/s15YaUznTe7jc46YjnqZPaJJGbDir/m03QR8qWi0qMhLjTTfPYe4DFYa27
4e8sTeK0DTSpUMBhQqagRQEFcYRd6QaZ4wfj+8zwX7EUha9jOKrMALvxRurkEqsa
m9NZntaHlSkRjQAZ562TUYowBpb841O0v0c9+i8SM6D9kRVV+NIj3StPNkQG7qlc
+PhF5YA4jYAifZ7AtWraLeopTPonfX0avWbIt5ryy4Y+sISwsg4HZ+rdNrJq9MUu
YWDbdO/lRclnFJ64VmD0rH7Fuef7CDiQvwn0NPJHQoU6h/zHnfOEIlh44h+0uy+R
lEp41vrb9mA/a7ZZEohcJroQ6JL1Z8b+KLY47ryuqneLklCVTGbMNGZxusOtAgMB
AAGjggVEMIIFQDALBgNVHQ8EBAMCAYYwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4E
FgQUv4YryvNsbT5fHDtOTtiN52rHak8wEAYJKwYBBAGCNxUBBAMCAQAwggTtBgNV
HSAEggTkMIIE4DCCAg8GCWCGSAFlAwIBEzCCAgAwMAYIKwYBBQUHAgEWJGh0dHA6
Ly9wa2kyLmRvaS5uZXQvbGVnYWxwb2xpY3kuYXNwADCCAcoGCCsGAQUFBwICMIIB
vB6CAbgAQwBlAHIAdABpAGYAaQBjAGEAdABlACAAaQBzAHMAdQBlAGQAIABiAHkA
IAB0AGgAZQAgAEQAZQBwAGEAcgB0AG0AZQBuAHQAIABvAGYAIAB0AGgAZQAgAEkA
bgB0AGUAcgBpAG8AcgAgAGEAcgBlACAAbwBuAGwAeQAgAGYAbwByACAAaQBuAHQA
ZQByAG4AYQBsACAAdQBuAGMAbABhAHMAcwBpAGYAaQBlAGQAIABVAFMAIABHAG8A
dgBlAHIAbgBtAGUAbgB0ACAAdQBzAGUAIABhAGwAbAAgAG8AdABoAGUAcgAgAHUA
cwBlACAAaQBzACAAcAByAG8AaABpAGIAaQB0AGUAZAAuACAAVQBuAGEAdQB0AGgA
bwByAGkAegBlAGQAIAB1AHMAZQAgAG0AYQB5ACAAcwB1AGIAagBlAGMAdAAgAHYA
aQBvAGwAYQB0AG8AcgBzACAAdABvACAAYwByAGkAbQBpAG4AYQBsACwAIABjAGkA
dgBpAGwAIABhAG4AZAAvAG8AcgAgAGQAaQBzAGMAaQBwAGwAaQBuAGEAcgB5ACAA
YQBjAHQAaQBvAG4ALjCCAskGCmCGSAFlAwIBEwEwggK5MDUGCCsGAQUFBwIBFilo
dHRwOi8vcGtpMi5kb2kubmV0L2xpbWl0ZWR1c2Vwb2xpY3kuYXNwADCCAn4GCCsG
AQUFBwICMIICcB6CAmwAVQBzAGUAIABvAGYAIAB0AGgAaQBzACAAQwBlAHIAdABp
AGYAaQBjAGEAdABlACAAaQBzACAAbABpAG0AaQB0AGUAZAAgAHQAbwAgAEkAbgB0
AGUAcgBuAGEAbAAgAEcAbwB2AGUAcgBuAG0AZQBuAHQAIAB1AHMAZQAgAGIAeQAg
AC8AIABmAG8AcgAgAHQAaABlACAARABlAHAAYQByAHQAbQBlAG4AdAAgAG8AZgAg
AHQAaABlACAASQBuAHQAZQByAGkAbwByACAAbwBuAGwAeQAuACAARQB4AHQAZQBy
AG4AYQBsACAAdQBzAGUAIABvAHIAIAByAGUAYwBlAGkAcAB0ACAAbwBmACAAdABo
AGkAcwAgAEMAZQByAHQAaQBmAGkAYwBhAHQAZQAgAHMAaABvAHUAbABkACAAbgBv
AHQAIABiAGUAIAB0AHIAdQBzAHQAZQBkAC4AIABBAGwAbAAgAHMAdQBzAHAAZQBj
AHQAZQBkACAAbQBpAHMAdQBzAGUAIABvAHIAIABjAG8AbQBwAHIAbwBtAGkAcwBl
ACAAbwBmACAAdABoAGkAcwAgAGMAZQByAHQAaQBmAGkAYwBhAHQAZQAgAHMAaABv
AHUAbABkACAAYgBlACAAcgBlAHAAbwByAHQAZQBkACAAaQBtAG0AZQBkAGkAYQB0
AGUAbAB5ACAAdABvACAAYQAgAEQAZQBwAGEAcgB0AG0AZQBuAHQAIABvAGYAIAB0
AGgAZQAgAEkAbgB0AGUAcgBpAG8AcgAgAFMAZQBjAHUAcgBpAHQAeQAgAE8AZgBm
AGkAYwBlAHIALjANBgkqhkiG9w0BAQsFAAOCAgEAF/q4Z2mRTIYJMu5mzlWsbV4o
gGQJ9YcSdUZRq2vzINJCpGDXstAIE81Pfz/Fna98KOkjEB8XGXVUGQf07c9ylGJS
XFoBwcN8GgOuys5iiP9/yd2yLHB8rBb8pu9RForl9RoTsYY8nFuOOtl9o2EfB/1O
PbRYkfHhhqrfvvHdvDKWPmT+ZhaliWJrg2my432yqBqPePjqMZSl4sxiPYi9WicU
UWYdJpxQlys3igICD4GXOcSh316jfaqfN8+9jps+lgO7rqOA41B8fU9Gwi4B8jjx
Tw0pgvbuebwwL5IQwrsGcA8rFfRPR6CaSY5v3XXqTMbCXyYjNK1/44I9MoFFaFPc
e3cqZ5cQ+lCoW3UE0SLNZb3YKh28ES/Gi5CO0Bq5P8QVLRJQL5xOaSzV9blszHv5
okR+lkSsVo2QzR/mzFD7lXtwznkd/uak0hripTB7MtZenBzoQ8zAgjgw5TXjRSAZ
goWiJTAg+YTKclhJ7Cfg/m4XeCxzNgz/pU1XEdBF2Ngvp3C9M5CSBcqzb234uiFF
SyvJl/6erDTkQ5dLrnSnsJIw1ZS/XG/Fi41u8il0piLc5depTLn9qiWf29BRBEtG
xwFKSmqlRWsClj/zADirBTjcctw7ajPMkRpebgn+Bzv1eWDx4+OolQuR/a45644Q
GHVtIa/kVEl2DE0WcUw=
-----END CERTIFICATE-----
2 changes: 1 addition & 1 deletion deploy/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ Once running, the system will respond to maps by being added to the CDR by execu

## Deployment Recommendations

The system has been tested on an AWS `m7g.2xlarge node` (32GB RAM, 8x vCPU, NO GPU), which results in processing times of 2-3 minutes per individual map. A node with 16GB of RAM was tested, but larger maps failed to process due to out-of-memory errors.
The system has been tested on an AWS `m6i.2xlarge node` (32GB RAM, 8x vCPU, NO GPU), which results in processing times of 2-3 minutes per individual map. A node with 16GB of RAM was tested, but larger maps failed to process due to out-of-memory errors.

The S3 bucket used for the `workdir` and `imagedir` storage should have an expiry period set (24 hrs. recommended). The data stored during processing is not needed after results are written to the CDR, but the application **does not** clean up these files itself.

2 changes: 1 addition & 1 deletion pipelines/geo_referencing/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "lara-georeferencing"
version = "0.1.1"
version = "1.0.0"
description = "LARA georeferencing pipeline and server"
readme = "README.md"
dependencies = ["jsons", "flask", "lara-tasks", "mypy-boto3-s3"]
Expand Down
7 changes: 6 additions & 1 deletion pipelines/geo_referencing/run_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,12 @@ def run_pipeline(parsed, input_data: ImageFileInputIterator):
input = create_input(raster_id, image, query_path)

logger.info(f"running pipeline {pipeline.id}")
output = pipeline.run(input)
try:
output = pipeline.run(input)
except Exception as e:
logger.exception(e)
continue

logger.info(f"done pipeline {pipeline.id}\n\n")

# store the baseline georeferencing results
Expand Down
2 changes: 1 addition & 1 deletion pipelines/metadata_extraction/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "lara-map-metadata-extraction"
version = "0.1.0"
version = "1.0.0"
description = "LARA map metadata extraction pipeline and server"
readme = "README.md"
dependencies = ["flask", "lara-tasks", "mypy-boto3-s3"]
Expand Down
6 changes: 5 additions & 1 deletion pipelines/metadata_extraction/run_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,11 @@ def main():
# run the extraction pipeline
for doc_id, image in input:
image_input = PipelineInput(image=image, raster_id=doc_id)
results = pipeline.run(image_input)
try:
results = pipeline.run(image_input)
except Exception as e:
logger.exception(e)
continue

# write the results out to the file system or s3 bucket
for output_type, output_data in results.items():
Expand Down
Loading

0 comments on commit 0faae67

Please sign in to comment.