-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #48 from scottstanie/dataspace-migration
Dataspace migration
- Loading branch information
Showing
29 changed files
with
79,516 additions
and
450 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
ci: | ||
autofix_prs: false | ||
|
||
repos: | ||
- repo: https://github.com/pre-commit/pre-commit-hooks | ||
rev: "v4.4.0" | ||
hooks: | ||
# https://github.com/pre-commit/pre-commit-hooks/issues/718 | ||
# - id: check-added-large-files # Fails with git v1.8.3 | ||
- id: check-case-conflict | ||
- id: check-merge-conflict | ||
- id: check-yaml | ||
args: [--allow-multiple-documents] | ||
- id: debug-statements | ||
- id: end-of-file-fixer | ||
- id: file-contents-sorter | ||
files: (requirements.txt)$ | ||
- id: mixed-line-ending | ||
- id: trailing-whitespace | ||
|
||
- repo: https://github.com/astral-sh/ruff-pre-commit | ||
rev: v0.1.2 | ||
hooks: | ||
- id: ruff | ||
args: [--fix, --exit-non-zero-on-fix] | ||
types_or: [python, jupyter] | ||
- id: ruff-format | ||
|
||
- repo: https://github.com/pre-commit/mirrors-mypy | ||
rev: "v1.4.1" | ||
hooks: | ||
- id: mypy | ||
additional_dependencies: | ||
- types-requests | ||
- types-python-dateutil |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,5 @@ | ||
from . import download | ||
from . import parsing | ||
import importlib.metadata | ||
|
||
from . import download, parsing # noqa | ||
|
||
__version__ = importlib.metadata.version("sentineleof") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import sys | ||
|
||
from eof.cli import cli | ||
|
||
sys.exit(cli()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
from __future__ import annotations | ||
|
||
import getpass | ||
import netrc | ||
import os | ||
from pathlib import Path | ||
|
||
from ._types import Filename | ||
|
||
NASA_HOST = "urs.earthdata.nasa.gov" | ||
DATASPACE_HOST = "dataspace.copernicus.eu" | ||
|
||
|
||
def setup_netrc(netrc_file: Filename = "~/.netrc", host: str = NASA_HOST): | ||
"""Prompt user for NASA/Dataspace username/password, store as attribute of ~/.netrc.""" | ||
netrc_file = Path(netrc_file).expanduser() | ||
try: | ||
n = netrc.netrc(netrc_file) | ||
has_correct_permission = _file_is_0600(netrc_file) | ||
if not has_correct_permission: | ||
# User has a netrc file, but it's not set up correctly | ||
print( | ||
"Your ~/.netrc file does not have the correct" | ||
" permissions.\n*Changing permissions to 0600*" | ||
" (read/write for user only).", | ||
) | ||
os.chmod(netrc_file, 0o600) | ||
# Check account exists, as well is having username and password | ||
_has_existing_entry = ( | ||
host in n.hosts | ||
and n.authenticators(host)[0] # type: ignore | ||
and n.authenticators(host)[2] # type: ignore | ||
) | ||
if _has_existing_entry: | ||
return | ||
except FileNotFoundError: | ||
# User doesn't have a netrc file, make one | ||
print("No ~/.netrc file found, creating one.") | ||
Path(netrc_file).write_text("") | ||
n = netrc.netrc(netrc_file) | ||
|
||
username, password = _get_username_pass(host) | ||
# Add account to netrc file | ||
n.hosts[host] = (username, None, password) | ||
print(f"Saving credentials to {netrc_file} (machine={host}).") | ||
with open(netrc_file, "w") as f: | ||
f.write(str(n)) | ||
# Set permissions to 0600 (read/write for user only) | ||
# https://www.ibm.com/docs/en/aix/7.1?topic=formats-netrc-file-format-tcpip | ||
os.chmod(netrc_file, 0o600) | ||
|
||
|
||
def _file_is_0600(filename: Filename): | ||
"""Check that a file has 0600 permissions (read/write for user only).""" | ||
return oct(Path(filename).stat().st_mode)[-4:] == "0600" | ||
|
||
|
||
def get_netrc_credentials(host: str) -> tuple[str, str]: | ||
"""Get username and password from netrc file for a given host.""" | ||
n = netrc.netrc() | ||
auth = n.authenticators(host) | ||
if auth is None: | ||
raise ValueError(f"No username/password found for {host} in ~/.netrc") | ||
username, _, password = auth | ||
if username is None or password is None: | ||
raise ValueError(f"No username/password found for {host} in ~/.netrc") | ||
return username, password | ||
|
||
|
||
def _get_username_pass(host: str): | ||
"""If netrc is not set up, get username/password via command line input.""" | ||
if host == NASA_HOST: | ||
from .asf_client import SIGNUP_URL as signup_url | ||
elif host == DATASPACE_HOST: | ||
from .dataspace_client import SIGNUP_URL as signup_url | ||
|
||
print(f"Please enter credentials for {host} to download data.") | ||
print(f"See the {signup_url} for signup info") | ||
|
||
username = input("Username: ") | ||
|
||
password = getpass.getpass("Password (will not be displayed): ") | ||
return username, password |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
"""Module for filtering/selecting from orbit query""" | ||
from __future__ import annotations | ||
|
||
import operator | ||
from datetime import datetime, timedelta | ||
from typing import Sequence | ||
|
||
from .products import SentinelOrbit | ||
|
||
T_ORBIT = (12 * 86400.0) / 175.0 | ||
"""Orbital period of Sentinel-1 in seconds""" | ||
|
||
|
||
class OrbitSelectionError(RuntimeError): | ||
pass | ||
|
||
|
||
class ValidityError(ValueError): | ||
pass | ||
|
||
|
||
def last_valid_orbit( | ||
t0: datetime, | ||
t1: datetime, | ||
data: Sequence[SentinelOrbit], | ||
margin0=timedelta(seconds=T_ORBIT + 60), | ||
margin1=timedelta(minutes=5), | ||
) -> str: | ||
# Using a start margin of > 1 orbit so that the start of the orbit file will | ||
# cover the ascending node crossing of the acquisition | ||
candidates = [ | ||
item | ||
for item in data | ||
if item.start_time <= (t0 - margin0) and item.stop_time >= (t1 + margin1) | ||
] | ||
if not candidates: | ||
raise ValidityError( | ||
"none of the input products completely covers the requested " | ||
"time interval: [t0={}, t1={}]".format(t0, t1) | ||
) | ||
|
||
candidates.sort(key=operator.attrgetter("created_time"), reverse=True) | ||
|
||
return candidates[0].filename |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
from __future__ import annotations | ||
|
||
from os import PathLike | ||
from typing import TYPE_CHECKING, Tuple, Union | ||
|
||
# Some classes are declared as generic in stubs, but not at runtime. | ||
# In Python 3.9 and earlier, os.PathLike is not subscriptable, results in a runtime error | ||
# https://stackoverflow.com/questions/71077499/typeerror-abcmeta-object-is-not-subscriptable | ||
if TYPE_CHECKING: | ||
PathLikeStr = PathLike[str] | ||
else: | ||
PathLikeStr = PathLike | ||
|
||
Filename = Union[str, PathLikeStr] | ||
|
||
# left, bottom, right, top | ||
Bbox = Tuple[float, float, float, float] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,150 @@ | ||
"""Client to get orbit files from ASF.""" | ||
from __future__ import annotations | ||
|
||
import os | ||
from datetime import timedelta | ||
from typing import Optional | ||
|
||
import requests | ||
|
||
from ._auth import NASA_HOST, setup_netrc | ||
from ._select_orbit import T_ORBIT, ValidityError, last_valid_orbit | ||
from ._types import Filename | ||
from .log import logger | ||
from .parsing import EOFLinkFinder | ||
from .products import SentinelOrbit | ||
|
||
SIGNUP_URL = "https://urs.earthdata.nasa.gov/users/new" | ||
"""Url to prompt user to sign up for NASA Earthdata account.""" | ||
|
||
|
||
class ASFClient: | ||
precise_url = "https://s1qc.asf.alaska.edu/aux_poeorb/" | ||
res_url = "https://s1qc.asf.alaska.edu/aux_resorb/" | ||
urls = {"precise": precise_url, "restituted": res_url} | ||
eof_lists = {"precise": None, "restituted": None} | ||
|
||
def __init__(self, cache_dir: Optional[Filename] = None): | ||
setup_netrc(host=NASA_HOST) | ||
self._cache_dir = cache_dir | ||
|
||
def get_full_eof_list(self, orbit_type="precise", max_dt=None): | ||
"""Get the list of orbit files from the ASF server.""" | ||
if orbit_type not in self.urls.keys(): | ||
raise ValueError("Unknown orbit type: {}".format(orbit_type)) | ||
|
||
if self.eof_lists.get(orbit_type) is not None: | ||
return self.eof_lists[orbit_type] | ||
# Try to see if we have the list of EOFs in the cache | ||
elif os.path.exists(self._get_filename_cache_path(orbit_type)): | ||
eof_list = self._get_cached_filenames(orbit_type) | ||
# Need to clear it if it's older than what we're looking for | ||
max_saved = max([e.start_time for e in eof_list]) | ||
if max_saved < max_dt: | ||
logger.warning("Clearing cached {} EOF list:".format(orbit_type)) | ||
logger.warning( | ||
"{} is older than requested {}".format(max_saved, max_dt) | ||
) | ||
self._clear_cache(orbit_type) | ||
else: | ||
logger.info("Using cached EOF list") | ||
self.eof_lists[orbit_type] = eof_list | ||
return eof_list | ||
|
||
logger.info("Downloading all filenames from ASF (may take awhile)") | ||
resp = requests.get(self.urls.get(orbit_type)) | ||
finder = EOFLinkFinder() | ||
finder.feed(resp.text) | ||
eof_list = [SentinelOrbit(f) for f in finder.eof_links] | ||
self.eof_lists[orbit_type] = eof_list | ||
self._write_cached_filenames(orbit_type, eof_list) | ||
return eof_list | ||
|
||
def get_download_urls(self, orbit_dts, missions, orbit_type="precise"): | ||
"""Find the URL for an orbit file covering the specified datetime | ||
Args: | ||
dt (datetime): requested | ||
Args: | ||
orbit_dts (list[str] or list[datetime]): datetime for orbit coverage | ||
missions (list[str]): specify S1A or S1B | ||
Returns: | ||
str: URL for the orbit file | ||
""" | ||
eof_list = self.get_full_eof_list(orbit_type=orbit_type, max_dt=max(orbit_dts)) | ||
# Split up for quicker parsing of the latest one | ||
mission_to_eof_list = { | ||
"S1A": [eof for eof in eof_list if eof.mission == "S1A"], | ||
"S1B": [eof for eof in eof_list if eof.mission == "S1B"], | ||
} | ||
# For precise orbits, we can have a larger front margin to ensure we | ||
# cover the ascending node crossing | ||
if orbit_type == "precise": | ||
margin0 = timedelta(seconds=T_ORBIT + 60) | ||
else: | ||
margin0 = timedelta(seconds=60) | ||
|
||
remaining_orbits = [] | ||
urls = [] | ||
for dt, mission in zip(orbit_dts, missions): | ||
try: | ||
filename = last_valid_orbit( | ||
dt, dt, mission_to_eof_list[mission], margin0=margin0 | ||
) | ||
urls.append(self.urls[orbit_type] + filename) | ||
except ValidityError: | ||
remaining_orbits.append((dt, mission)) | ||
|
||
if remaining_orbits: | ||
logger.warning("The following dates were not found: %s", remaining_orbits) | ||
if orbit_type == "precise": | ||
logger.warning( | ||
"Attempting to download the restituted orbits for these dates." | ||
) | ||
remaining_dts, remaining_missions = zip(*remaining_orbits) | ||
urls.extend( | ||
self.get_download_urls( | ||
remaining_dts, remaining_missions, orbit_type="restituted" | ||
) | ||
) | ||
|
||
return urls | ||
|
||
def _get_cached_filenames(self, orbit_type="precise"): | ||
"""Get the cache path for the ASF orbit files.""" | ||
filepath = self._get_filename_cache_path(orbit_type) | ||
logger.debug(f"ASF file path cache: {filepath = }") | ||
if os.path.exists(filepath): | ||
with open(filepath, "r") as f: | ||
return [SentinelOrbit(f) for f in f.read().splitlines()] | ||
return None | ||
|
||
def _write_cached_filenames(self, orbit_type="precise", eof_list=[]): | ||
"""Cache the ASF orbit files.""" | ||
filepath = self._get_filename_cache_path(orbit_type) | ||
with open(filepath, "w") as f: | ||
for e in eof_list: | ||
f.write(e.filename + "\n") | ||
|
||
def _clear_cache(self, orbit_type="precise"): | ||
"""Clear the cache for the ASF orbit files.""" | ||
filepath = self._get_filename_cache_path(orbit_type) | ||
os.remove(filepath) | ||
|
||
def _get_filename_cache_path(self, orbit_type="precise"): | ||
fname = "{}_filenames.txt".format(orbit_type.lower()) | ||
return os.path.join(self.get_cache_dir(), fname) | ||
|
||
def get_cache_dir(self): | ||
"""Find location of directory to store .hgt downloads | ||
Assuming linux, uses ~/.cache/sentineleof/ | ||
""" | ||
if self._cache_dir is not None: | ||
return self._cache_dir | ||
path = os.getenv("XDG_CACHE_HOME", os.path.expanduser("~/.cache")) | ||
path = os.path.join(path, "sentineleof") # Make subfolder for our downloads | ||
logger.debug("Cache path: %s", path) | ||
if not os.path.exists(path): | ||
os.makedirs(path) | ||
return path |
Oops, something went wrong.