Skip to content

Commit

Permalink
Merge pull request #43 from MotionbyLearning/ruff_formatting
Browse files Browse the repository at this point in the history
Ruff formatting
  • Loading branch information
rogerkuou authored Nov 8, 2023
2 parents 9326c15 + cb6f963 commit a2de500
Show file tree
Hide file tree
Showing 10 changed files with 112 additions and 80 deletions.
8 changes: 8 additions & 0 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
name: Ruff lint
on: [push, pull_request]
jobs:
ruff:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: chartboost/ruff-action@v1
1 change: 1 addition & 0 deletions docs/CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ to see if someone already filed the same issue;
- make sure the existing tests still work. First, install the development dependencies as `pip install .[dev]`, and then run `pytest tests`;
- add your own tests (if necessary);
- update or expand the documentation. Make sure the documentation is built successfully: first, install documentation dependencies as `pip install .[docs]` and then run `mkdocs build`.
- make sure the linting tests pass by running `ruff` in the project root directory: `ruff check .`;
- [push](http://rogerdudler.github.io/git-guide/) your feature branch to (your fork of) the SARXarray repository on GitHub;
- create the pull request, e.g. following the instructions [here](https://help.github.com/articles/creating-a-pull-request/).

Expand Down
28 changes: 27 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "sarxarray"
version = "0.1.0"
requires-python = ">=3.10"
dependencies = [
"dask[complete]",
"xarray",
Expand Down Expand Up @@ -44,6 +45,7 @@ dev = [
"pytest",
"pytest-cov",
"pycodestyle",
"ruff",
]
docs = [
"mkdocs",
Expand All @@ -69,4 +71,28 @@ testpaths = [

[tool.coverage.run]
branch = true
source = ["sarxarray"]
source = ["sarxarray"]

[tool.ruff]
select = [
"E", # pycodestyle
"F", # pyflakes
"B", # flake8-bugbear
"D", # pydocstyle
"I", # isort
"N", # PEP8-naming
"UP", # pyupgrade (upgrade syntax to current syntax)
"PLE", # Pylint error https://github.com/charliermarsh/ruff#error-ple
]
ignore = [
"D100", "D101", "D104", "D105", "D106", "D107", "D203", "D213"
] # docstring style

line-length = 88
exclude = ["docs", "build"]
# Allow unused variables when underscore-prefixed.
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
target-version = "py310"

[tool.ruff.per-file-ignores]
"tests/**" = ["D"]
5 changes: 4 additions & 1 deletion sarxarray/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
from sarxarray import stack
from sarxarray._io import from_binary
from sarxarray.utils import multi_look, complex_coherence
from sarxarray.utils import complex_coherence, multi_look

__all__ = ("stack", "from_binary", "multi_look", "complex_coherence")
64 changes: 29 additions & 35 deletions sarxarray/_io.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import logging
import math

import dask
import dask.array as da
import numpy as np
import xarray as xr
import dask.array as da
import math
import sarxarray.stack

from .conf import _dtypes, _memsize_chunk_mb

Expand All @@ -17,8 +16,7 @@
def from_binary(
slc_files, shape, vlabel="complex", dtype=np.complex64, chunks=None, ratio=1
):
"""
Read a SLC stack or relabted variables from binary files
"""Read a SLC stack or relabted variables from binary files.
Parameters
----------
Expand All @@ -41,16 +39,15 @@ def from_binary(
An xarray.Dataset with three dimensions: (azimuth, range, time).
"""

# Check dtype
if not np.dtype(dtype).isbuiltin:
if not all([name in (("re", "im")) for name in dtype.names]):
raise TypeError(
(

"The customed dtype should have only two field names: "
'"re" and "im". For example: '
'dtype = np.dtype([("re", np.float32), ("im", np.float32)]).'
)

)

# Initialize stack as a Dataset
Expand All @@ -59,7 +56,7 @@ def from_binary(
"range": range(shape[1]),
"time": range(len(slc_files)),
}
stack = xr.Dataset(coords=coords)
ds_stack = xr.Dataset(coords=coords)

# Calculate appropriate chunk size if not user-defined
if chunks is None:
Expand All @@ -83,33 +80,30 @@ def from_binary(
meta_arr = np.array((), dtype=_dtypes["complex"])
slcs = da.apply_gufunc(_unpack_complex, "()->()", slcs, meta=meta_arr)

stack = stack.assign({vlabel: (("azimuth", "range", "time"), slcs)})
ds_stack = ds_stack.assign({vlabel: (("azimuth", "range", "time"), slcs)})

# If reading complex data, automatically
if vlabel == "complex":
stack = stack.slcstack._get_amplitude()
stack = stack.slcstack._get_phase()
ds_stack = ds_stack.slcstack._get_amplitude()
ds_stack = ds_stack.slcstack._get_phase()

return stack
return ds_stack


def _mmap_dask_array(filename, shape, dtype, chunks):
"""
Create a Dask array from raw binary data in :code:`filename`
by memory mapping.
"""Create a Dask array from raw binary data by memory mapping.
This method is particularly effective if the file is already
in the file system cache and if arbitrary smaller subsets are
to be extracted from the Dask array without optimizing its
chunking scheme.
It may perform poorly on Windows if the file is not in the file
system cache. On Linux it performs well under most circumstances.
Parameters
----------
filename : str
The path to the file that contains raw binary data.
shape : tuple
Total shape of the data in the file
dtype:
Expand All @@ -119,7 +113,6 @@ def _mmap_dask_array(filename, shape, dtype, chunks):
Returns
-------
dask.array.Array
Dask array matching :code:`shape` and :code:`dtype`, backed by
memory-mapped chunks.
Expand Down Expand Up @@ -151,28 +144,31 @@ def _mmap_dask_array(filename, shape, dtype, chunks):


def _mmap_load_chunk(filename, shape, dtype, sl1, sl2):
"""
Memory map the given file with overall shape and dtype and return a slice
specified by :code:`sl`.
"""Memory map the given file with overall shape and dtype.
It returns a slice specified by :code:`sl1` in azimuth direction and
:code:`sl2` in range direction.
Parameters
----------
filename : str
The path to the file that contains raw binary data.
shape : tuple
Total shape of the data in the file
dtype:
NumPy dtype of the data in the file
sl:
Object that can be used for indexing or slicing a NumPy array to
extract a chunk
sl1:
Slice object in azimuth direction that can be used for indexing or
slicing a NumPy array to extract a chunk
sl2:
Slice object in range direction that can be used for indexing or slicing
a NumPy array to extract a chunk
Returns
-------
numpy.memmap or numpy.ndarray
View into memory map created by indexing with :code:`sl`,
or NumPy ndarray in case no view can be created using :code:`sl`.
View into memory map created by indexing with :code:`sl1` and
:code:`sl2`, or NumPy ndarray in case no view can be created.
"""
data = np.memmap(filename, mode="r", shape=shape, dtype=dtype)
return data[sl1, sl2]
Expand All @@ -183,13 +179,13 @@ def _unpack_complex(complex):


def _calc_chunksize(shape: tuple, dtype: np.dtype, ratio: int):
"""
Calculate an optimal chunking size in the azimuth and range direction for
reading with dask and store it in variable `chunks`
"""Calculate an optimal chunking size.
It calculates an optimal chunking size in the azimuth and range direction
for reading with dask and store it in variable `chunks`.
Parameters
----------
shape : tuple
Total shape of the data in the file
dtype:
Expand All @@ -199,12 +195,10 @@ def _calc_chunksize(shape: tuple, dtype: np.dtype, ratio: int):
Returns
-------
chunks: tuple
Chunk sizes (as multiples of 1000) in the azimuth and range direction.
Default value of [-1, -1] when unmodified activates this function.
"""

n_elements = (
_memsize_chunk_mb * 1024 * 1024 / np.dtype(dtype).itemsize
) # Optimal number of elements for a memory size of 100mb (first number)
Expand Down
32 changes: 17 additions & 15 deletions sarxarray/stack.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import sarxarray
import dask.array as da
import numpy as np
import xarray as xr
import dask.array as da

from .conf import _dtypes
from .utils import multi_look

Expand Down Expand Up @@ -30,16 +30,17 @@ def _get_phase(self):
return self._obj

def mrm(self):
"""Compute a Mean Reflection Map (MRM)."""
t_order = list(self._obj.dims.keys()).index("time") # Time dimension order
return self._obj.amplitude.mean(axis=t_order)

def point_selection(self, threshold, method="amplitude_dispersion", chunks=1000):
"""
Select pixels from a Stack, and return a Space-Time Matrix.
"""Select pixels from a Stack, and return a Space-Time Matrix.
The selection method is defined by `method` and `threshold`.
The selected pixels will be reshaped to (space, time), where `space` is the number of selected pixels.
The unselected pixels will be discarded. The original `azimuth` and `range` coordinates will be persisted.
The selected pixels will be reshaped to (space, time), where `space` is
the number of selected pixels. The unselected pixels will be discarded.
The original `azimuth` and `range` coordinates will be persisted.
Parameters
----------
Expand All @@ -55,11 +56,10 @@ def point_selection(self, threshold, method="amplitude_dispersion", chunks=1000)
xarray.Dataset
An xarray.Dataset with two dimensions: (space, time).
"""

match method:
case "amplitude_dispersion":
mask = self._amp_disp() < threshold
case other:
case _:
raise NotImplementedError

# Get the 1D index on space dimension
Expand All @@ -68,7 +68,8 @@ def point_selection(self, threshold, method="amplitude_dispersion", chunks=1000)
)
index = mask_1d.space.data[mask_1d.data] # Evaluate the mask

# Reshape from Stack ("azimuth", "range", "time") to Space-Time Matrix ("space", "time")
# Reshape from Stack ("azimuth", "range", "time") to Space-Time Matrix
# ("space", "time")
stacked = self._obj.stack(space=("azimuth", "range"))
stm = stacked.drop_vars(
["space", "azimuth", "range"]
Expand All @@ -83,13 +84,15 @@ def point_selection(self, threshold, method="amplitude_dispersion", chunks=1000)
# Apply selection
stm_masked = stm.sel(space=index)

# Re-order the dimensions to community preferred ("space", "time") order
# Since there are dask arrays in stm_masked, this operation is lazy.
# Re-order the dimensions to
# community preferred ("space", "time") order
# Since there are dask arrays in stm_masked,
# this operation is lazy.
# Therefore its effect can be observed after evaluation
stm_masked = stm_masked.transpose("space", "time")

# Rechunk
# Rechunk is needed because after apply maksing, the chunksize will be in consistant
# Rechunk is needed because after apply maksing,
# the chunksize will be in consistant
stm_masked = stm_masked.chunk(
{
"space": chunks,
Expand Down Expand Up @@ -117,8 +120,7 @@ def _amp_disp(self, chunk_azimuth=500, chunk_range=500):
def multi_look(
self, window_size, method="coarsen", statistics="mean", compute=True
):
"""
Perform multi-looking on a Stack, and return a Stack.
"""Perform multi-looking on a Stack, and return a Stack.
Parameters
----------
Expand Down
11 changes: 4 additions & 7 deletions sarxarray/utils.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
import sarxarray
import numpy as np
import xarray as xr
from dask.delayed import delayed, Delayed
from dask.delayed import Delayed, delayed


def multi_look(data, window_size, method="coarsen", statistics="mean", compute=True):
"""
Perform multi-looking on a Stack, and return a Stack.
"""Perform multi-looking on a Stack, and return a Stack.
Parameters
----------
Expand Down Expand Up @@ -85,8 +83,7 @@ def _custom_coord_func(reshaped, axis):
def complex_coherence(
reference: xr.DataArray, other: xr.DataArray, window_size, compute=True
):
"""
Calculate complex coherence of two images.
"""Calculate complex coherence of two images.
Assume two images reference (R) and other (O), the complex coherence is
defined as:
Expand Down Expand Up @@ -158,7 +155,7 @@ def _compute_coherence(numerator, denominator):

def _validate_multi_look_inputs(data, window_size, method, statistics):
# check if data is xarray
if not isinstance(data, (xr.Dataset, xr.DataArray)):
if not isinstance(data, xr.Dataset | xr.DataArray):
raise TypeError("The data must be an xarray.Dataset or xarray.DataArray.")

# check if azimuth, range are in the dimensions
Expand Down
5 changes: 3 additions & 2 deletions tests/test_io.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
"""test _io.py
"""

import numpy as np
import pytest

import sarxarray
import numpy as np
from sarxarray._io import _unpack_complex, _calc_chunksize
from sarxarray._io import _calc_chunksize, _unpack_complex


@pytest.fixture()
Expand Down
3 changes: 1 addition & 2 deletions tests/test_stack.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
"""test stack.py
"""
import pytest
import sarxarray
import numpy as np
import pytest
import xarray as xr
from dask.delayed import Delayed

Expand Down
Loading

0 comments on commit a2de500

Please sign in to comment.