Merge pull request #43 from MotionbyLearning/ruff_formatting

Ruff formatting
TUDelftGeodesy · Nov 8, 2023 · a2de500 · a2de500
2 parents 9326c15 + cb6f963
commit a2de500
Show file tree

Hide file tree

Showing 10 changed files with 112 additions and 80 deletions.
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -0,0 +1,8 @@
+name: Ruff lint
+on: [push, pull_request]
+jobs:
+  ruff:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: chartboost/ruff-action@v1
diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md
@@ -38,6 +38,7 @@ to see if someone already filed the same issue;
 - make sure the existing tests still work. First, install the development dependencies as `pip install .[dev]`, and then run `pytest tests`;
 - add your own tests (if necessary);
 - update or expand the documentation. Make sure the documentation is built successfully: first, install documentation dependencies as `pip install .[docs]` and then run `mkdocs build`.
+- make sure the linting tests pass by running `ruff` in the project root directory: `ruff check .`;
 - [push](http://rogerdudler.github.io/git-guide/) your feature branch to (your fork of) the SARXarray repository on GitHub;
 - create the pull request, e.g. following the instructions [here](https://help.github.com/articles/creating-a-pull-request/).
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -5,6 +5,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "sarxarray"
 version = "0.1.0"
+requires-python = ">=3.10"
 dependencies = [
     "dask[complete]",
     "xarray",
@@ -44,6 +45,7 @@ dev = [
     "pytest",
     "pytest-cov",
     "pycodestyle",
+    "ruff",
 ]
 docs = [
     "mkdocs",
@@ -69,4 +71,28 @@ testpaths = [
 
 [tool.coverage.run]
 branch = true
-source = ["sarxarray"]
+source = ["sarxarray"]
+
+[tool.ruff]
+select = [
+  "E",  # pycodestyle
+  "F",  # pyflakes
+  "B",  # flake8-bugbear
+  "D",  # pydocstyle
+  "I",  # isort
+  "N",  # PEP8-naming
+  "UP",  # pyupgrade (upgrade syntax to current syntax)
+  "PLE",  # Pylint error https://github.com/charliermarsh/ruff#error-ple
+]
+ignore = [
+"D100", "D101", "D104", "D105", "D106", "D107", "D203", "D213"
+] # docstring style
+
+line-length = 88
+exclude = ["docs", "build"]
+# Allow unused variables when underscore-prefixed.
+dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
+target-version = "py310"
+
+[tool.ruff.per-file-ignores]
+"tests/**" = ["D"]
diff --git a/sarxarray/__init__.py b/sarxarray/__init__.py
@@ -1,2 +1,5 @@
+from sarxarray import stack
 from sarxarray._io import from_binary
-from sarxarray.utils import multi_look, complex_coherence
+from sarxarray.utils import complex_coherence, multi_look
+
+__all__ = ("stack", "from_binary", "multi_look", "complex_coherence")
diff --git a/sarxarray/_io.py b/sarxarray/_io.py
@@ -1,11 +1,10 @@
 import logging
+import math
 
 import dask
+import dask.array as da
 import numpy as np
 import xarray as xr
-import dask.array as da
-import math
-import sarxarray.stack
 
 from .conf import _dtypes, _memsize_chunk_mb
 
@@ -17,8 +16,7 @@
 def from_binary(
     slc_files, shape, vlabel="complex", dtype=np.complex64, chunks=None, ratio=1
 ):
-    """
-    Read a SLC stack or relabted variables from binary files
+    """Read a SLC stack or relabted variables from binary files.
 
     Parameters
     ----------
@@ -41,16 +39,15 @@ def from_binary(
         An xarray.Dataset with three dimensions: (azimuth, range, time).
 
     """
-
     # Check dtype
     if not np.dtype(dtype).isbuiltin:
         if not all([name in (("re", "im")) for name in dtype.names]):
             raise TypeError(
-                (
+
                     "The customed dtype should have only two field names: "
                     '"re" and "im". For example: '
                     'dtype = np.dtype([("re", np.float32), ("im", np.float32)]).'
-                )
+
             )
 
     # Initialize stack as a Dataset
@@ -59,7 +56,7 @@ def from_binary(
         "range": range(shape[1]),
         "time": range(len(slc_files)),
     }
-    stack = xr.Dataset(coords=coords)
+    ds_stack = xr.Dataset(coords=coords)
 
     # Calculate appropriate chunk size if not user-defined
     if chunks is None:
@@ -83,33 +80,30 @@ def from_binary(
         meta_arr = np.array((), dtype=_dtypes["complex"])
         slcs = da.apply_gufunc(_unpack_complex, "()->()", slcs, meta=meta_arr)
 
-    stack = stack.assign({vlabel: (("azimuth", "range", "time"), slcs)})
+    ds_stack = ds_stack.assign({vlabel: (("azimuth", "range", "time"), slcs)})
 
     # If reading complex data, automatically
     if vlabel == "complex":
-        stack = stack.slcstack._get_amplitude()
-        stack = stack.slcstack._get_phase()
+        ds_stack = ds_stack.slcstack._get_amplitude()
+        ds_stack = ds_stack.slcstack._get_phase()
 
-    return stack
+    return ds_stack
 
 
 def _mmap_dask_array(filename, shape, dtype, chunks):
-    """
-    Create a Dask array from raw binary data in :code:`filename`
-    by memory mapping.
+    """Create a Dask array from raw binary data by memory mapping.
 
     This method is particularly effective if the file is already
     in the file system cache and if arbitrary smaller subsets are
     to be extracted from the Dask array without optimizing its
     chunking scheme.
-
     It may perform poorly on Windows if the file is not in the file
     system cache. On Linux it performs well under most circumstances.
 
     Parameters
     ----------
-
     filename : str
+        The path to the file that contains raw binary data.
     shape : tuple
         Total shape of the data in the file
     dtype:
@@ -119,7 +113,6 @@ def _mmap_dask_array(filename, shape, dtype, chunks):
 
     Returns
     -------
-
     dask.array.Array
         Dask array matching :code:`shape` and :code:`dtype`, backed by
         memory-mapped chunks.
@@ -151,28 +144,31 @@ def _mmap_dask_array(filename, shape, dtype, chunks):
 
 
 def _mmap_load_chunk(filename, shape, dtype, sl1, sl2):
-    """
-    Memory map the given file with overall shape and dtype and return a slice
-    specified by :code:`sl`.
+    """Memory map the given file with overall shape and dtype.
+
+    It returns a slice specified by :code:`sl1` in azimuth direction and
+    :code:`sl2` in range direction.
 
     Parameters
     ----------
-
     filename : str
+        The path to the file that contains raw binary data.
     shape : tuple
         Total shape of the data in the file
     dtype:
         NumPy dtype of the data in the file
-    sl:
-        Object that can be used for indexing or slicing a NumPy array to
-        extract a chunk
+    sl1:
+        Slice object in azimuth direction that can be used for indexing or
+        slicing a NumPy array to extract a chunk
+    sl2:
+        Slice object in range direction that can be used for indexing or slicing
+        a NumPy array to extract a chunk
 
     Returns
     -------
-
     numpy.memmap or numpy.ndarray
-        View into memory map created by indexing with :code:`sl`,
-        or NumPy ndarray in case no view can be created using :code:`sl`.
+        View into memory map created by indexing with :code:`sl1` and
+        :code:`sl2`, or NumPy ndarray in case no view can be created.
     """
     data = np.memmap(filename, mode="r", shape=shape, dtype=dtype)
     return data[sl1, sl2]
@@ -183,13 +179,13 @@ def _unpack_complex(complex):
 
 
 def _calc_chunksize(shape: tuple, dtype: np.dtype, ratio: int):
-    """
-    Calculate an optimal chunking size in the azimuth and range direction for
-    reading with dask and store it in variable `chunks`
+    """Calculate an optimal chunking size.
+
+    It calculates an optimal chunking size in the azimuth and range direction
+    for reading with dask and store it in variable `chunks`.
 
     Parameters
     ----------
-
     shape : tuple
         Total shape of the data in the file
     dtype:
@@ -199,12 +195,10 @@ def _calc_chunksize(shape: tuple, dtype: np.dtype, ratio: int):
 
     Returns
     -------
-
     chunks: tuple
         Chunk sizes (as multiples of 1000) in the azimuth and range direction.
         Default value of [-1, -1] when unmodified activates this function.
     """
-
     n_elements = (
         _memsize_chunk_mb * 1024 * 1024 / np.dtype(dtype).itemsize
     )  # Optimal number of elements for a memory size of 100mb (first number)

diff --git a/sarxarray/stack.py b/sarxarray/stack.py
@@ -1,7 +1,7 @@
-import sarxarray
+import dask.array as da
 import numpy as np
 import xarray as xr
-import dask.array as da
+
 from .conf import _dtypes
 from .utils import multi_look
 
@@ -30,16 +30,17 @@ def _get_phase(self):
         return self._obj
 
     def mrm(self):
+        """Compute a Mean Reflection Map (MRM)."""
         t_order = list(self._obj.dims.keys()).index("time")  # Time dimension order
         return self._obj.amplitude.mean(axis=t_order)
 
     def point_selection(self, threshold, method="amplitude_dispersion", chunks=1000):
-        """
-        Select pixels from a Stack, and return a Space-Time Matrix.
+        """Select pixels from a Stack, and return a Space-Time Matrix.
 
         The selection method is defined by `method` and `threshold`.
-        The selected pixels will be reshaped to (space, time), where `space` is the number of selected pixels.
-        The unselected pixels will be discarded. The original `azimuth` and `range` coordinates will be persisted.
+        The selected pixels will be reshaped to (space, time), where `space` is
+        the number of selected pixels. The unselected pixels will be discarded.
+        The original `azimuth` and `range` coordinates will be persisted.
 
         Parameters
         ----------
@@ -55,11 +56,10 @@ def point_selection(self, threshold, method="amplitude_dispersion", chunks=1000)
         xarray.Dataset
             An xarray.Dataset with two dimensions: (space, time).
         """
-
         match method:
             case "amplitude_dispersion":
                 mask = self._amp_disp() < threshold
-            case other:
+            case _:
                 raise NotImplementedError
 
         # Get the 1D index on space dimension
@@ -68,7 +68,8 @@ def point_selection(self, threshold, method="amplitude_dispersion", chunks=1000)
         )
         index = mask_1d.space.data[mask_1d.data]  # Evaluate the mask
 
-        # Reshape from Stack ("azimuth", "range", "time") to Space-Time Matrix ("space", "time")
+        # Reshape from Stack ("azimuth", "range", "time") to Space-Time Matrix
+        # ("space", "time")
         stacked = self._obj.stack(space=("azimuth", "range"))
         stm = stacked.drop_vars(
             ["space", "azimuth", "range"]
@@ -83,13 +84,15 @@ def point_selection(self, threshold, method="amplitude_dispersion", chunks=1000)
         # Apply selection
         stm_masked = stm.sel(space=index)
 
-        # Re-order the dimensions to community preferred ("space", "time") order
-        # Since there are dask arrays in stm_masked, this operation is lazy.
+        # Re-order the dimensions to
+        # community preferred ("space", "time") order
+        # Since there are dask arrays in stm_masked,
+        # this operation is lazy.
         # Therefore its effect can be observed after evaluation
         stm_masked = stm_masked.transpose("space", "time")
 
-        # Rechunk
-        # Rechunk is needed because after apply maksing, the chunksize will be in consistant
+        # Rechunk is needed because after apply maksing,
+        # the chunksize will be in consistant
         stm_masked = stm_masked.chunk(
             {
                 "space": chunks,
@@ -117,8 +120,7 @@ def _amp_disp(self, chunk_azimuth=500, chunk_range=500):
     def multi_look(
         self, window_size, method="coarsen", statistics="mean", compute=True
     ):
-        """
-        Perform multi-looking on a Stack, and return a Stack.
+        """Perform multi-looking on a Stack, and return a Stack.
 
         Parameters
         ----------

diff --git a/sarxarray/utils.py b/sarxarray/utils.py
@@ -1,12 +1,10 @@
-import sarxarray
 import numpy as np
 import xarray as xr
-from dask.delayed import delayed, Delayed
+from dask.delayed import Delayed, delayed
 
 
 def multi_look(data, window_size, method="coarsen", statistics="mean", compute=True):
-    """
-    Perform multi-looking on a Stack, and return a Stack.
+    """Perform multi-looking on a Stack, and return a Stack.
 
     Parameters
     ----------
@@ -85,8 +83,7 @@ def _custom_coord_func(reshaped, axis):
 def complex_coherence(
     reference: xr.DataArray, other: xr.DataArray, window_size, compute=True
 ):
-    """
-    Calculate complex coherence of two images.
+    """Calculate complex coherence of two images.
 
     Assume two images reference (R) and other (O), the complex coherence is
     defined as:
@@ -158,7 +155,7 @@ def _compute_coherence(numerator, denominator):
 
 def _validate_multi_look_inputs(data, window_size, method, statistics):
     # check if data is xarray
-    if not isinstance(data, (xr.Dataset, xr.DataArray)):
+    if not isinstance(data, xr.Dataset | xr.DataArray):
         raise TypeError("The data must be an xarray.Dataset or xarray.DataArray.")
 
     # check if azimuth, range are in the dimensions

diff --git a/tests/test_io.py b/tests/test_io.py
@@ -1,10 +1,11 @@
 """test _io.py
 """
 
+import numpy as np
 import pytest
+
 import sarxarray
-import numpy as np
-from sarxarray._io import _unpack_complex, _calc_chunksize
+from sarxarray._io import _calc_chunksize, _unpack_complex
 
 
 @pytest.fixture()

diff --git a/tests/test_stack.py b/tests/test_stack.py
@@ -1,8 +1,7 @@
 """test stack.py
 """
-import pytest
-import sarxarray
 import numpy as np
+import pytest
 import xarray as xr
 from dask.delayed import Delayed