Skip to content

Commit

Permalink
Merge pull request #48 from scottstanie/dataspace-migration
Browse files Browse the repository at this point in the history
Dataspace migration
  • Loading branch information
scottstanie authored Oct 31, 2023
2 parents cbe6df9 + 68b593c commit 611865b
Show file tree
Hide file tree
Showing 29 changed files with 79,516 additions and 450 deletions.
12 changes: 11 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,17 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install requests click python-dateutil sentinelsat>=1.0 pytest
python -m pip install requests click python-dateutil pytest pytest-recording
python -m pip install .
- name: Setup Dummy ~/.netrc file
run: |
echo "machine urs.earthdata.nasa.gov" >> ~/.netrc
echo " login asdf" >> ~/.netrc
echo " password asdf" >> ~/.netrc
echo "machine dataspace.copernicus.eu" >> ~/.netrc
echo " login asdf" >> ~/.netrc
echo " password asdf" >> ~/.netrc
chmod 600 ~/.netrc
- name: Test with pytest
run: |
python -m pytest -v --doctest-modules --ignore=eof/__main__.py
35 changes: 35 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
ci:
autofix_prs: false

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: "v4.4.0"
hooks:
# https://github.com/pre-commit/pre-commit-hooks/issues/718
# - id: check-added-large-files # Fails with git v1.8.3
- id: check-case-conflict
- id: check-merge-conflict
- id: check-yaml
args: [--allow-multiple-documents]
- id: debug-statements
- id: end-of-file-fixer
- id: file-contents-sorter
files: (requirements.txt)$
- id: mixed-line-ending
- id: trailing-whitespace

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.1.2
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
types_or: [python, jupyter]
- id: ruff-format

- repo: https://github.com/pre-commit/mirrors-mypy
rev: "v1.4.1"
hooks:
- id: mypy
additional_dependencies:
- types-requests
- types-python-dateutil
1 change: 0 additions & 1 deletion LICENSE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,3 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[![Build Status](https://travis-ci.org/scottstanie/sentineleof.svg?branch=master)](https://travis-ci.org/scottstanie/sentineleof)
[![Build Status](https://travis-ci.org/scottstanie/sentineleof.svg?branch=master)](https://travis-ci.org/scottstanie/sentineleof)

# Sentinel EOF

Expand All @@ -17,7 +17,7 @@ or through conda:
conda install -c conda-forge sentineleof
```

This will put the executable `eof` on your path
This will put the executable `eof` on your path


If you have a bunch of Sentinel 1 zip files (or unzipped SAFE folders), you can simply run
Expand Down
7 changes: 5 additions & 2 deletions eof/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
from . import download
from . import parsing
import importlib.metadata

from . import download, parsing # noqa

__version__ = importlib.metadata.version("sentineleof")
2 changes: 1 addition & 1 deletion eof/__main__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python3

import sys

from eof.cli import cli

sys.exit(cli())
83 changes: 83 additions & 0 deletions eof/_auth.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
from __future__ import annotations

import getpass
import netrc
import os
from pathlib import Path

from ._types import Filename

NASA_HOST = "urs.earthdata.nasa.gov"
DATASPACE_HOST = "dataspace.copernicus.eu"


def setup_netrc(netrc_file: Filename = "~/.netrc", host: str = NASA_HOST):
"""Prompt user for NASA/Dataspace username/password, store as attribute of ~/.netrc."""
netrc_file = Path(netrc_file).expanduser()
try:
n = netrc.netrc(netrc_file)
has_correct_permission = _file_is_0600(netrc_file)
if not has_correct_permission:
# User has a netrc file, but it's not set up correctly
print(
"Your ~/.netrc file does not have the correct"
" permissions.\n*Changing permissions to 0600*"
" (read/write for user only).",
)
os.chmod(netrc_file, 0o600)
# Check account exists, as well is having username and password
_has_existing_entry = (
host in n.hosts
and n.authenticators(host)[0] # type: ignore
and n.authenticators(host)[2] # type: ignore
)
if _has_existing_entry:
return
except FileNotFoundError:
# User doesn't have a netrc file, make one
print("No ~/.netrc file found, creating one.")
Path(netrc_file).write_text("")
n = netrc.netrc(netrc_file)

username, password = _get_username_pass(host)
# Add account to netrc file
n.hosts[host] = (username, None, password)
print(f"Saving credentials to {netrc_file} (machine={host}).")
with open(netrc_file, "w") as f:
f.write(str(n))
# Set permissions to 0600 (read/write for user only)
# https://www.ibm.com/docs/en/aix/7.1?topic=formats-netrc-file-format-tcpip
os.chmod(netrc_file, 0o600)


def _file_is_0600(filename: Filename):
"""Check that a file has 0600 permissions (read/write for user only)."""
return oct(Path(filename).stat().st_mode)[-4:] == "0600"


def get_netrc_credentials(host: str) -> tuple[str, str]:
"""Get username and password from netrc file for a given host."""
n = netrc.netrc()
auth = n.authenticators(host)
if auth is None:
raise ValueError(f"No username/password found for {host} in ~/.netrc")
username, _, password = auth
if username is None or password is None:
raise ValueError(f"No username/password found for {host} in ~/.netrc")
return username, password


def _get_username_pass(host: str):
"""If netrc is not set up, get username/password via command line input."""
if host == NASA_HOST:
from .asf_client import SIGNUP_URL as signup_url
elif host == DATASPACE_HOST:
from .dataspace_client import SIGNUP_URL as signup_url

print(f"Please enter credentials for {host} to download data.")
print(f"See the {signup_url} for signup info")

username = input("Username: ")

password = getpass.getpass("Password (will not be displayed): ")
return username, password
44 changes: 44 additions & 0 deletions eof/_select_orbit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""Module for filtering/selecting from orbit query"""
from __future__ import annotations

import operator
from datetime import datetime, timedelta
from typing import Sequence

from .products import SentinelOrbit

T_ORBIT = (12 * 86400.0) / 175.0
"""Orbital period of Sentinel-1 in seconds"""


class OrbitSelectionError(RuntimeError):
pass


class ValidityError(ValueError):
pass


def last_valid_orbit(
t0: datetime,
t1: datetime,
data: Sequence[SentinelOrbit],
margin0=timedelta(seconds=T_ORBIT + 60),
margin1=timedelta(minutes=5),
) -> str:
# Using a start margin of > 1 orbit so that the start of the orbit file will
# cover the ascending node crossing of the acquisition
candidates = [
item
for item in data
if item.start_time <= (t0 - margin0) and item.stop_time >= (t1 + margin1)
]
if not candidates:
raise ValidityError(
"none of the input products completely covers the requested "
"time interval: [t0={}, t1={}]".format(t0, t1)
)

candidates.sort(key=operator.attrgetter("created_time"), reverse=True)

return candidates[0].filename
17 changes: 17 additions & 0 deletions eof/_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from __future__ import annotations

from os import PathLike
from typing import TYPE_CHECKING, Tuple, Union

# Some classes are declared as generic in stubs, but not at runtime.
# In Python 3.9 and earlier, os.PathLike is not subscriptable, results in a runtime error
# https://stackoverflow.com/questions/71077499/typeerror-abcmeta-object-is-not-subscriptable
if TYPE_CHECKING:
PathLikeStr = PathLike[str]
else:
PathLikeStr = PathLike

Filename = Union[str, PathLikeStr]

# left, bottom, right, top
Bbox = Tuple[float, float, float, float]
150 changes: 150 additions & 0 deletions eof/asf_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
"""Client to get orbit files from ASF."""
from __future__ import annotations

import os
from datetime import timedelta
from typing import Optional

import requests

from ._auth import NASA_HOST, setup_netrc
from ._select_orbit import T_ORBIT, ValidityError, last_valid_orbit
from ._types import Filename
from .log import logger
from .parsing import EOFLinkFinder
from .products import SentinelOrbit

SIGNUP_URL = "https://urs.earthdata.nasa.gov/users/new"
"""Url to prompt user to sign up for NASA Earthdata account."""


class ASFClient:
precise_url = "https://s1qc.asf.alaska.edu/aux_poeorb/"
res_url = "https://s1qc.asf.alaska.edu/aux_resorb/"
urls = {"precise": precise_url, "restituted": res_url}
eof_lists = {"precise": None, "restituted": None}

def __init__(self, cache_dir: Optional[Filename] = None):
setup_netrc(host=NASA_HOST)
self._cache_dir = cache_dir

def get_full_eof_list(self, orbit_type="precise", max_dt=None):
"""Get the list of orbit files from the ASF server."""
if orbit_type not in self.urls.keys():
raise ValueError("Unknown orbit type: {}".format(orbit_type))

if self.eof_lists.get(orbit_type) is not None:
return self.eof_lists[orbit_type]
# Try to see if we have the list of EOFs in the cache
elif os.path.exists(self._get_filename_cache_path(orbit_type)):
eof_list = self._get_cached_filenames(orbit_type)
# Need to clear it if it's older than what we're looking for
max_saved = max([e.start_time for e in eof_list])
if max_saved < max_dt:
logger.warning("Clearing cached {} EOF list:".format(orbit_type))
logger.warning(
"{} is older than requested {}".format(max_saved, max_dt)
)
self._clear_cache(orbit_type)
else:
logger.info("Using cached EOF list")
self.eof_lists[orbit_type] = eof_list
return eof_list

logger.info("Downloading all filenames from ASF (may take awhile)")
resp = requests.get(self.urls.get(orbit_type))
finder = EOFLinkFinder()
finder.feed(resp.text)
eof_list = [SentinelOrbit(f) for f in finder.eof_links]
self.eof_lists[orbit_type] = eof_list
self._write_cached_filenames(orbit_type, eof_list)
return eof_list

def get_download_urls(self, orbit_dts, missions, orbit_type="precise"):
"""Find the URL for an orbit file covering the specified datetime
Args:
dt (datetime): requested
Args:
orbit_dts (list[str] or list[datetime]): datetime for orbit coverage
missions (list[str]): specify S1A or S1B
Returns:
str: URL for the orbit file
"""
eof_list = self.get_full_eof_list(orbit_type=orbit_type, max_dt=max(orbit_dts))
# Split up for quicker parsing of the latest one
mission_to_eof_list = {
"S1A": [eof for eof in eof_list if eof.mission == "S1A"],
"S1B": [eof for eof in eof_list if eof.mission == "S1B"],
}
# For precise orbits, we can have a larger front margin to ensure we
# cover the ascending node crossing
if orbit_type == "precise":
margin0 = timedelta(seconds=T_ORBIT + 60)
else:
margin0 = timedelta(seconds=60)

remaining_orbits = []
urls = []
for dt, mission in zip(orbit_dts, missions):
try:
filename = last_valid_orbit(
dt, dt, mission_to_eof_list[mission], margin0=margin0
)
urls.append(self.urls[orbit_type] + filename)
except ValidityError:
remaining_orbits.append((dt, mission))

if remaining_orbits:
logger.warning("The following dates were not found: %s", remaining_orbits)
if orbit_type == "precise":
logger.warning(
"Attempting to download the restituted orbits for these dates."
)
remaining_dts, remaining_missions = zip(*remaining_orbits)
urls.extend(
self.get_download_urls(
remaining_dts, remaining_missions, orbit_type="restituted"
)
)

return urls

def _get_cached_filenames(self, orbit_type="precise"):
"""Get the cache path for the ASF orbit files."""
filepath = self._get_filename_cache_path(orbit_type)
logger.debug(f"ASF file path cache: {filepath = }")
if os.path.exists(filepath):
with open(filepath, "r") as f:
return [SentinelOrbit(f) for f in f.read().splitlines()]
return None

def _write_cached_filenames(self, orbit_type="precise", eof_list=[]):
"""Cache the ASF orbit files."""
filepath = self._get_filename_cache_path(orbit_type)
with open(filepath, "w") as f:
for e in eof_list:
f.write(e.filename + "\n")

def _clear_cache(self, orbit_type="precise"):
"""Clear the cache for the ASF orbit files."""
filepath = self._get_filename_cache_path(orbit_type)
os.remove(filepath)

def _get_filename_cache_path(self, orbit_type="precise"):
fname = "{}_filenames.txt".format(orbit_type.lower())
return os.path.join(self.get_cache_dir(), fname)

def get_cache_dir(self):
"""Find location of directory to store .hgt downloads
Assuming linux, uses ~/.cache/sentineleof/
"""
if self._cache_dir is not None:
return self._cache_dir
path = os.getenv("XDG_CACHE_HOME", os.path.expanduser("~/.cache"))
path = os.path.join(path, "sentineleof") # Make subfolder for our downloads
logger.debug("Cache path: %s", path)
if not os.path.exists(path):
os.makedirs(path)
return path
Loading

0 comments on commit 611865b

Please sign in to comment.