Merge pull request #48 from scottstanie/dataspace-migration

Dataspace migration
scottstanie · Oct 31, 2023 · 611865b · 611865b
2 parents cbe6df9 + 68b593c
commit 611865b
Show file tree

Hide file tree

Showing 29 changed files with 79,516 additions and 450 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -25,7 +25,17 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        python -m pip install requests click python-dateutil sentinelsat>=1.0 pytest
+        python -m pip install requests click python-dateutil pytest pytest-recording
+        python -m pip install .
+    - name: Setup Dummy ~/.netrc file
+      run: |
+        echo "machine urs.earthdata.nasa.gov" >> ~/.netrc
+        echo "  login asdf" >> ~/.netrc
+        echo "  password asdf" >> ~/.netrc
+        echo "machine dataspace.copernicus.eu" >> ~/.netrc
+        echo "  login asdf" >> ~/.netrc
+        echo "  password asdf" >> ~/.netrc
+        chmod 600 ~/.netrc
     - name: Test with pytest
       run: |
         python -m pytest -v --doctest-modules --ignore=eof/__main__.py
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,35 @@
+ci:
+  autofix_prs: false
+
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: "v4.4.0"
+    hooks:
+      # https://github.com/pre-commit/pre-commit-hooks/issues/718
+      # - id: check-added-large-files  # Fails with git v1.8.3
+      - id: check-case-conflict
+      - id: check-merge-conflict
+      - id: check-yaml
+        args: [--allow-multiple-documents]
+      - id: debug-statements
+      - id: end-of-file-fixer
+      - id: file-contents-sorter
+        files: (requirements.txt)$
+      - id: mixed-line-ending
+      - id: trailing-whitespace
+
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.1.2
+    hooks:
+      - id: ruff
+        args: [--fix, --exit-non-zero-on-fix]
+        types_or: [python, jupyter]
+      - id: ruff-format
+
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: "v1.4.1"
+    hooks:
+      - id: mypy
+        additional_dependencies:
+        - types-requests
+        - types-python-dateutil
diff --git a/LICENSE.txt b/LICENSE.txt
@@ -19,4 +19,3 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
-
diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-[![Build Status](https://travis-ci.org/scottstanie/sentineleof.svg?branch=master)](https://travis-ci.org/scottstanie/sentineleof) 
+[![Build Status](https://travis-ci.org/scottstanie/sentineleof.svg?branch=master)](https://travis-ci.org/scottstanie/sentineleof)
 
 # Sentinel EOF
 
@@ -17,7 +17,7 @@ or through conda:
 conda install -c conda-forge sentineleof
 ```
 
-This will put the executable `eof` on your path 
+This will put the executable `eof` on your path
 
 
 If you have a bunch of Sentinel 1 zip files (or unzipped SAFE folders), you can simply run

diff --git a/eof/__init__.py b/eof/__init__.py
@@ -1,2 +1,5 @@
-from . import download
-from . import parsing
+import importlib.metadata
+
+from . import download, parsing  # noqa
+
+__version__ = importlib.metadata.version("sentineleof")
diff --git a/eof/__main__.py b/eof/__main__.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
-
 import sys
+
 from eof.cli import cli
 
 sys.exit(cli())
diff --git a/eof/_auth.py b/eof/_auth.py
@@ -0,0 +1,83 @@
+from __future__ import annotations
+
+import getpass
+import netrc
+import os
+from pathlib import Path
+
+from ._types import Filename
+
+NASA_HOST = "urs.earthdata.nasa.gov"
+DATASPACE_HOST = "dataspace.copernicus.eu"
+
+
+def setup_netrc(netrc_file: Filename = "~/.netrc", host: str = NASA_HOST):
+    """Prompt user for NASA/Dataspace username/password, store as attribute of ~/.netrc."""
+    netrc_file = Path(netrc_file).expanduser()
+    try:
+        n = netrc.netrc(netrc_file)
+        has_correct_permission = _file_is_0600(netrc_file)
+        if not has_correct_permission:
+            # User has a netrc file, but it's not set up correctly
+            print(
+                "Your ~/.netrc file does not have the correct"
+                " permissions.\n*Changing permissions to 0600*"
+                " (read/write for user only).",
+            )
+            os.chmod(netrc_file, 0o600)
+        # Check account exists, as well is having username and password
+        _has_existing_entry = (
+            host in n.hosts
+            and n.authenticators(host)[0]  # type: ignore
+            and n.authenticators(host)[2]  # type: ignore
+        )
+        if _has_existing_entry:
+            return
+    except FileNotFoundError:
+        # User doesn't have a netrc file, make one
+        print("No ~/.netrc file found, creating one.")
+        Path(netrc_file).write_text("")
+        n = netrc.netrc(netrc_file)
+
+    username, password = _get_username_pass(host)
+    # Add account to netrc file
+    n.hosts[host] = (username, None, password)
+    print(f"Saving credentials to {netrc_file} (machine={host}).")
+    with open(netrc_file, "w") as f:
+        f.write(str(n))
+    # Set permissions to 0600 (read/write for user only)
+    # https://www.ibm.com/docs/en/aix/7.1?topic=formats-netrc-file-format-tcpip
+    os.chmod(netrc_file, 0o600)
+
+
+def _file_is_0600(filename: Filename):
+    """Check that a file has 0600 permissions (read/write for user only)."""
+    return oct(Path(filename).stat().st_mode)[-4:] == "0600"
+
+
+def get_netrc_credentials(host: str) -> tuple[str, str]:
+    """Get username and password from netrc file for a given host."""
+    n = netrc.netrc()
+    auth = n.authenticators(host)
+    if auth is None:
+        raise ValueError(f"No username/password found for {host} in ~/.netrc")
+    username, _, password = auth
+    if username is None or password is None:
+        raise ValueError(f"No username/password found for {host} in ~/.netrc")
+    return username, password
+
+
+def _get_username_pass(host: str):
+    """If netrc is not set up, get username/password via command line input."""
+    if host == NASA_HOST:
+        from .asf_client import SIGNUP_URL as signup_url
+    elif host == DATASPACE_HOST:
+        from .dataspace_client import SIGNUP_URL as signup_url
+
+    print(f"Please enter credentials for {host} to download data.")
+    print(f"See the {signup_url} for signup info")
+
+    username = input("Username: ")
+
+    password = getpass.getpass("Password (will not be displayed): ")
+    return username, password
diff --git a/eof/_select_orbit.py b/eof/_select_orbit.py
@@ -0,0 +1,44 @@
+"""Module for filtering/selecting from orbit query"""
+from __future__ import annotations
+
+import operator
+from datetime import datetime, timedelta
+from typing import Sequence
+
+from .products import SentinelOrbit
+
+T_ORBIT = (12 * 86400.0) / 175.0
+"""Orbital period of Sentinel-1 in seconds"""
+
+
+class OrbitSelectionError(RuntimeError):
+    pass
+
+
+class ValidityError(ValueError):
+    pass
+
+
+def last_valid_orbit(
+    t0: datetime,
+    t1: datetime,
+    data: Sequence[SentinelOrbit],
+    margin0=timedelta(seconds=T_ORBIT + 60),
+    margin1=timedelta(minutes=5),
+) -> str:
+    # Using a start margin of > 1 orbit so that the start of the orbit file will
+    # cover the ascending node crossing of the acquisition
+    candidates = [
+        item
+        for item in data
+        if item.start_time <= (t0 - margin0) and item.stop_time >= (t1 + margin1)
+    ]
+    if not candidates:
+        raise ValidityError(
+            "none of the input products completely covers the requested "
+            "time interval: [t0={}, t1={}]".format(t0, t1)
+        )
+
+    candidates.sort(key=operator.attrgetter("created_time"), reverse=True)
+
+    return candidates[0].filename
diff --git a/eof/_types.py b/eof/_types.py
@@ -0,0 +1,17 @@
+from __future__ import annotations
+
+from os import PathLike
+from typing import TYPE_CHECKING, Tuple, Union
+
+# Some classes are declared as generic in stubs, but not at runtime.
+# In Python 3.9 and earlier, os.PathLike is not subscriptable, results in a runtime error
+# https://stackoverflow.com/questions/71077499/typeerror-abcmeta-object-is-not-subscriptable
+if TYPE_CHECKING:
+    PathLikeStr = PathLike[str]
+else:
+    PathLikeStr = PathLike
+
+Filename = Union[str, PathLikeStr]
+
+# left, bottom, right, top
+Bbox = Tuple[float, float, float, float]
diff --git a/eof/asf_client.py b/eof/asf_client.py
@@ -0,0 +1,150 @@
+"""Client to get orbit files from ASF."""
+from __future__ import annotations
+
+import os
+from datetime import timedelta
+from typing import Optional
+
+import requests
+
+from ._auth import NASA_HOST, setup_netrc
+from ._select_orbit import T_ORBIT, ValidityError, last_valid_orbit
+from ._types import Filename
+from .log import logger
+from .parsing import EOFLinkFinder
+from .products import SentinelOrbit
+
+SIGNUP_URL = "https://urs.earthdata.nasa.gov/users/new"
+"""Url to prompt user to sign up for NASA Earthdata account."""
+
+
+class ASFClient:
+    precise_url = "https://s1qc.asf.alaska.edu/aux_poeorb/"
+    res_url = "https://s1qc.asf.alaska.edu/aux_resorb/"
+    urls = {"precise": precise_url, "restituted": res_url}
+    eof_lists = {"precise": None, "restituted": None}
+
+    def __init__(self, cache_dir: Optional[Filename] = None):
+        setup_netrc(host=NASA_HOST)
+        self._cache_dir = cache_dir
+
+    def get_full_eof_list(self, orbit_type="precise", max_dt=None):
+        """Get the list of orbit files from the ASF server."""
+        if orbit_type not in self.urls.keys():
+            raise ValueError("Unknown orbit type: {}".format(orbit_type))
+
+        if self.eof_lists.get(orbit_type) is not None:
+            return self.eof_lists[orbit_type]
+        # Try to see if we have the list of EOFs in the cache
+        elif os.path.exists(self._get_filename_cache_path(orbit_type)):
+            eof_list = self._get_cached_filenames(orbit_type)
+            # Need to clear it if it's older than what we're looking for
+            max_saved = max([e.start_time for e in eof_list])
+            if max_saved < max_dt:
+                logger.warning("Clearing cached {} EOF list:".format(orbit_type))
+                logger.warning(
+                    "{} is older than requested {}".format(max_saved, max_dt)
+                )
+                self._clear_cache(orbit_type)
+            else:
+                logger.info("Using cached EOF list")
+                self.eof_lists[orbit_type] = eof_list
+                return eof_list
+
+        logger.info("Downloading all filenames from ASF (may take awhile)")
+        resp = requests.get(self.urls.get(orbit_type))
+        finder = EOFLinkFinder()
+        finder.feed(resp.text)
+        eof_list = [SentinelOrbit(f) for f in finder.eof_links]
+        self.eof_lists[orbit_type] = eof_list
+        self._write_cached_filenames(orbit_type, eof_list)
+        return eof_list
+
+    def get_download_urls(self, orbit_dts, missions, orbit_type="precise"):
+        """Find the URL for an orbit file covering the specified datetime
+
+        Args:
+            dt (datetime): requested
+        Args:
+            orbit_dts (list[str] or list[datetime]): datetime for orbit coverage
+            missions (list[str]): specify S1A or S1B
+
+        Returns:
+            str: URL for the orbit file
+        """
+        eof_list = self.get_full_eof_list(orbit_type=orbit_type, max_dt=max(orbit_dts))
+        # Split up for quicker parsing of the latest one
+        mission_to_eof_list = {
+            "S1A": [eof for eof in eof_list if eof.mission == "S1A"],
+            "S1B": [eof for eof in eof_list if eof.mission == "S1B"],
+        }
+        # For precise orbits, we can have a larger front margin to ensure we
+        # cover the ascending node crossing
+        if orbit_type == "precise":
+            margin0 = timedelta(seconds=T_ORBIT + 60)
+        else:
+            margin0 = timedelta(seconds=60)
+
+        remaining_orbits = []
+        urls = []
+        for dt, mission in zip(orbit_dts, missions):
+            try:
+                filename = last_valid_orbit(
+                    dt, dt, mission_to_eof_list[mission], margin0=margin0
+                )
+                urls.append(self.urls[orbit_type] + filename)
+            except ValidityError:
+                remaining_orbits.append((dt, mission))
+
+        if remaining_orbits:
+            logger.warning("The following dates were not found: %s", remaining_orbits)
+            if orbit_type == "precise":
+                logger.warning(
+                    "Attempting to download the restituted orbits for these dates."
+                )
+                remaining_dts, remaining_missions = zip(*remaining_orbits)
+                urls.extend(
+                    self.get_download_urls(
+                        remaining_dts, remaining_missions, orbit_type="restituted"
+                    )
+                )
+
+        return urls
+
+    def _get_cached_filenames(self, orbit_type="precise"):
+        """Get the cache path for the ASF orbit files."""
+        filepath = self._get_filename_cache_path(orbit_type)
+        logger.debug(f"ASF file path cache: {filepath = }")
+        if os.path.exists(filepath):
+            with open(filepath, "r") as f:
+                return [SentinelOrbit(f) for f in f.read().splitlines()]
+        return None
+
+    def _write_cached_filenames(self, orbit_type="precise", eof_list=[]):
+        """Cache the ASF orbit files."""
+        filepath = self._get_filename_cache_path(orbit_type)
+        with open(filepath, "w") as f:
+            for e in eof_list:
+                f.write(e.filename + "\n")
+
+    def _clear_cache(self, orbit_type="precise"):
+        """Clear the cache for the ASF orbit files."""
+        filepath = self._get_filename_cache_path(orbit_type)
+        os.remove(filepath)
+
+    def _get_filename_cache_path(self, orbit_type="precise"):
+        fname = "{}_filenames.txt".format(orbit_type.lower())
+        return os.path.join(self.get_cache_dir(), fname)
+
+    def get_cache_dir(self):
+        """Find location of directory to store .hgt downloads
+        Assuming linux, uses ~/.cache/sentineleof/
+        """
+        if self._cache_dir is not None:
+            return self._cache_dir
+        path = os.getenv("XDG_CACHE_HOME", os.path.expanduser("~/.cache"))
+        path = os.path.join(path, "sentineleof")  # Make subfolder for our downloads
+        logger.debug("Cache path: %s", path)
+        if not os.path.exists(path):
+            os.makedirs(path)
+        return path