Skip to content

Commit

Permalink
Add start of ASF s3 orbit fetching
Browse files Browse the repository at this point in the history
Addresses #63
  • Loading branch information
scottstanie committed Oct 3, 2024
1 parent 5bf6597 commit a3473cf
Show file tree
Hide file tree
Showing 2 changed files with 137 additions and 0 deletions.
117 changes: 117 additions & 0 deletions eof/_fetch_s3_bucket.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
import logging
from functools import cache
from typing import Optional, Literal

import requests
import xml.etree.ElementTree as ET

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

ASF_BUCKET_NAME = "s1-orbits"


@cache
def list_public_bucket(bucket_name: str, prefix: str = "") -> list[str]:
"""List all objects in a public S3 bucket.
Parameters
----------
bucket_name : str
Name of the S3 bucket.
prefix : str, optional
Prefix to filter objects, by default "".
Returns
-------
list[str]
list of object keys in the bucket.
Raises
------
requests.RequestException
If there's an error in the HTTP request.
"""
endpoint = f"https://{bucket_name}.s3.amazonaws.com"
marker: Optional[str] = None
keys: list[str] = []

while True:
params = {"prefix": prefix}
if marker:
params["marker"] = marker

try:
response = requests.get(endpoint, params=params)
response.raise_for_status()
except requests.RequestException as e:
logger.error(f"Error fetching bucket contents: {e}")
raise

root = ET.fromstring(response.content)
for contents in root.findall(
"{http://s3.amazonaws.com/doc/2006-03-01/}Contents"
):
key = contents.find("{http://s3.amazonaws.com/doc/2006-03-01/}Key")
if key:
keys.append(key.text or "")
logger.debug(f"Found key: {key}")

is_truncated = root.find("{http://s3.amazonaws.com/doc/2006-03-01/}IsTruncated")
if (
is_truncated is not None
and is_truncated.text
and is_truncated.text.lower() == "true"
):
next_marker = root.find(
"{http://s3.amazonaws.com/doc/2006-03-01/}NextMarker"
)
if next_marker is not None:
marker = next_marker.text
else:
found_keys = root.findall(
"{http://s3.amazonaws.com/doc/2006-03-01/}Contents/{http://s3.amazonaws.com/doc/2006-03-01/}Key"
)
if found_keys:
marker = found_keys[-1].text
else:
break
else:
break

return keys


def get_orbit_files(orbit_type: Literal["precise", "restituted"]) -> list[str]:
"""Get a list of precise or restituted orbit files.
Parameters
----------
orbit_type : Literal["precise", "restituted"]
Type of orbit files to retrieve.
Returns
-------
list[str]
list of orbit file keys.
Raises
------
ValueError
If an invalid orbit_type is provided.
"""
prefix = (
"AUX_POEORB"
if orbit_type == "precise"
else "AUX_RESORB"
if orbit_type == "restituted"
else None
)
if prefix is None:
raise ValueError("orbit_type must be either 'precise' or 'restituted'")

all_keys = list_public_bucket(ASF_BUCKET_NAME)
orbit_files = [key for key in all_keys if key.startswith(prefix)]

logger.info(f"Found {len(orbit_files)} {orbit_type} orbit files")
return orbit_files
20 changes: 20 additions & 0 deletions eof/tests/test_asf_s3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import pytest
from eof._fetch_s3_bucket import get_orbit_files


@pytest.mark.vcr()
def test_get_orbit_files():
"""
Test the get_orbit_files function using pytest and vcr.
"""
precise_orbits = get_orbit_files("precise")
restituted_orbits = get_orbit_files("restituted")

assert len(precise_orbits) > 0, "No precise orbit files found"
assert len(restituted_orbits) > 0, "No restituted orbit files found"
assert all(
orbit.startswith("AUX_POEORB") for orbit in precise_orbits
), "Invalid precise orbit file name"
assert all(
orbit.startswith("AUX_RESORB") for orbit in restituted_orbits
), "Invalid restituted orbit file name"

0 comments on commit a3473cf

Please sign in to comment.