diff --git a/httomolibgpu/__init__.py b/httomolibgpu/__init__.py index ccb48651..e2181454 100644 --- a/httomolibgpu/__init__.py +++ b/httomolibgpu/__init__.py @@ -3,12 +3,12 @@ from httomolibgpu.misc.rescale import rescale_to_int from httomolibgpu.prep.alignment import distortion_correction_proj_discorpy from httomolibgpu.prep.normalize import normalize -#from httomolibgpu.prep.phase import paganin_filter_savu, paganin_filter_tomopy +from httomolibgpu.prep.phase import paganin_filter_savu, paganin_filter_tomopy from httomolibgpu.prep.stripe import ( remove_stripe_based_sorting, remove_stripe_ti, remove_all_stripe, ) + from httomolibgpu.recon.algorithm import FBP, SIRT, CGLS -#from httomolibgpu.recon.rotation import find_center_vo, find_center_360, find_center_pc -#from httomolibgpu.recon.rotation import find_center_vo +from httomolibgpu.recon.rotation import find_center_vo, find_center_360, find_center_pc diff --git a/httomolibgpu/cupywrapper.py b/httomolibgpu/cupywrapper.py index a5b858cf..f3cccf61 100644 --- a/httomolibgpu/cupywrapper.py +++ b/httomolibgpu/cupywrapper.py @@ -1,11 +1,19 @@ cupy_run = False try: import cupy as cp + import nvtx + try: cp.cuda.Device(0).compute_capability cupy_run = True except cp.cuda.runtime.CUDARuntimeError: print("CuPy library is a major dependency for HTTomolibgpu, please install") import numpy as cp -except ImportError: - import numpy as cp \ No newline at end of file +except ImportError as e: + print( + f"Failed to import module in {__file__} with error: {e}; defaulting to CPU-only mode" + ) + from unittest.mock import Mock + import numpy as cp + + nvtx = Mock() diff --git a/httomolibgpu/misc/corr.py b/httomolibgpu/misc/corr.py index 28da4b67..49fe1521 100644 --- a/httomolibgpu/misc/corr.py +++ b/httomolibgpu/misc/corr.py @@ -22,17 +22,18 @@ import numpy as np from httomolibgpu import cupywrapper + cp = cupywrapper.cp +nvtx = cupywrapper.nvtx from numpy import float32 -import nvtx __all__ = [ "median_filter", "remove_outlier", ] -@nvtx.annotate() + def median_filter( data: cp.ndarray, kernel_size: int = 3, @@ -71,6 +72,7 @@ def median_filter( return data +@nvtx.annotate() def __median_filter( data: cp.ndarray, kernel_size: int = 3, @@ -157,7 +159,7 @@ def __median_filter( thresholding_kernel(data, float32(dif), output) return output -@nvtx.annotate() + def remove_outlier( data: cp.ndarray, kernel_size: int = 3, axis: int = 0, dif: float = 0.1 ) -> cp.ndarray: diff --git a/httomolibgpu/misc/morph.py b/httomolibgpu/misc/morph.py index 0a9c0b94..098d1712 100644 --- a/httomolibgpu/misc/morph.py +++ b/httomolibgpu/misc/morph.py @@ -22,9 +22,10 @@ import numpy as np from httomolibgpu import cupywrapper + cp = cupywrapper.cp -import nvtx +nvtx = cupywrapper.nvtx from typing import Literal __all__ = [ @@ -32,7 +33,7 @@ "data_resampler", ] -@nvtx.annotate() + def sino_360_to_180( data: cp.ndarray, overlap: int = 0, rotation: Literal["left", "right"] = "left" ) -> cp.ndarray: @@ -62,6 +63,7 @@ def sino_360_to_180( return data +@nvtx.annotate() def __sino_360_to_180( data: cp.ndarray, overlap: int = 0, rotation: Literal["left", "right"] = "left" ) -> cp.ndarray: @@ -103,7 +105,6 @@ def __sino_360_to_180( return out -@nvtx.annotate() def data_resampler( data: cp.ndarray, newshape: list, axis: int = 1, interpolation: str = "linear" ) -> cp.ndarray: @@ -123,17 +124,19 @@ def data_resampler( Returns: cp.ndarray: Up/Down-scaled 3D cupy array - """ + """ if cupywrapper.cupy_run: return __data_resampler(data, newshape, axis, interpolation) else: print("data_resampler won't be executed because CuPy is not installed") return data + +@nvtx.annotate() def __data_resampler( data: cp.ndarray, newshape: list, axis: int = 1, interpolation: str = "linear" ) -> cp.ndarray: - + from cupyx.scipy.interpolate import interpn if data.ndim != 3: diff --git a/httomolibgpu/misc/rescale.py b/httomolibgpu/misc/rescale.py index 506db058..3cdb25f1 100644 --- a/httomolibgpu/misc/rescale.py +++ b/httomolibgpu/misc/rescale.py @@ -21,15 +21,17 @@ import numpy as np from httomolibgpu import cupywrapper + cp = cupywrapper.cp -import nvtx +nvtx = cupywrapper.nvtx from typing import Literal, Optional, Tuple, Union __all__ = [ "rescale_to_int", ] -@nvtx.annotate() + + def rescale_to_int( data: cp.ndarray, perc_range_min: float = 0.0, @@ -72,8 +74,10 @@ def rescale_to_int( return __rescale_to_int(data, perc_range_min, perc_range_max, bits, glob_stats) else: print("rescale_to_int won't be executed because CuPy is not installed") - return data + return data + +@nvtx.annotate() def __rescale_to_int( data: cp.ndarray, perc_range_min: float = 0.0, diff --git a/httomolibgpu/prep/alignment.py b/httomolibgpu/prep/alignment.py index 484488db..480db1af 100644 --- a/httomolibgpu/prep/alignment.py +++ b/httomolibgpu/prep/alignment.py @@ -21,26 +21,12 @@ """Modules for data correction""" import numpy as np -cupy_run = False -try: - import cupy as xp - - try: - xp.cuda.Device(0).compute_capability - cupy_run = True - except xp.cuda.runtime.CUDARuntimeError: - print("CuPy library is a major dependency for HTTomolibgpu, please install") - import numpy as xp -except ImportError: - import numpy as xp +from httomolibgpu import cupywrapper -from typing import Dict, List -import nvtx +cp = cupywrapper.cp +nvtx = cupywrapper.nvtx -if cupy_run: - from cupyx.scipy.ndimage import map_coordinates -else: - from scipy.ndimage import map_coordinates +from typing import Dict, List __all__ = [ "distortion_correction_proj_discorpy", @@ -52,9 +38,56 @@ # (which is the same as the TomoPy version # https://github.com/tomopy/tomopy/blob/c236a2969074f5fc70189fb5545f0a165924f916/source/tomopy/prep/alignment.py#L950-L981 # but with the additional params `order` and `mode`). -@nvtx.annotate() def distortion_correction_proj_discorpy( - data: xp.ndarray, + data: cp.ndarray, + metadata_path: str, + preview: Dict[str, List[int]], + order: int = 1, + mode: str = "reflect", +): + """Unwarp a stack of images using a backward model. + + Parameters + ---------- + data : cp.ndarray + 3D array. + + metadata_path : str + The path to the file containing the distortion coefficients for the + data. + + preview : Dict[str, List[int]] + A dict containing three key-value pairs: + - a list containing the `start` value of each dimension + - a list containing the `stop` value of each dimension + - a list containing the `step` value of each dimension + + order : int, optional. + The order of the spline interpolation. + + mode : {'reflect', 'grid-mirror', 'constant', 'grid-constant', 'nearest', + 'mirror', 'grid-wrap', 'wrap'}, optional + To determine how to handle image boundaries. + + Returns + ------- + cp.ndarray + 3D array. Distortion-corrected image(s). + """ + if cupywrapper.cupy_run: + return __distortion_correction_proj_discorpy( + data, metadata_path, preview, order, mode + ) + else: + print( + "distortion_correction_proj_discorpy won't be executed because CuPy is not installed" + ) + return data + + +@nvtx.annotate() +def __distortion_correction_proj_discorpy( + data: cp.ndarray, metadata_path: str, preview: Dict[str, List[int]], order: int = 1, @@ -89,9 +122,12 @@ def distortion_correction_proj_discorpy( cp.ndarray 3D array. Distortion-corrected image(s). """ + + from cupyx.scipy.ndimage import map_coordinates + # Check if it's a stack of 2D images, or only a single 2D image if len(data.shape) == 2: - data = xp.expand_dims(data, axis=0) + data = cp.expand_dims(data, axis=0) # Get info from metadata txt file xcenter, ycenter, list_fact = _load_metadata_txt(metadata_path) @@ -118,26 +154,26 @@ def distortion_correction_proj_discorpy( ycenter = ycenter - y_offset height, width = data.shape[y_dim + 1], data.shape[x_dim + 1] - xu_list = xp.arange(width) - xcenter - yu_list = xp.arange(height) - ycenter - xu_mat, yu_mat = xp.meshgrid(xu_list, yu_list) - ru_mat = xp.sqrt(xu_mat**2 + yu_mat**2) - fact_mat = xp.sum( - xp.asarray([factor * ru_mat**i for i, factor in enumerate(list_fact)]), axis=0 + xu_list = cp.arange(width) - xcenter + yu_list = cp.arange(height) - ycenter + xu_mat, yu_mat = cp.meshgrid(xu_list, yu_list) + ru_mat = cp.sqrt(xu_mat**2 + yu_mat**2) + fact_mat = cp.sum( + cp.asarray([factor * ru_mat**i for i, factor in enumerate(list_fact)]), axis=0 ) - xd_mat = xp.asarray( - xp.clip(xcenter + fact_mat * xu_mat, 0, width - 1), dtype=xp.float32 + xd_mat = cp.asarray( + cp.clip(xcenter + fact_mat * xu_mat, 0, width - 1), dtype=cp.float32 ) - yd_mat = xp.asarray( - xp.clip(ycenter + fact_mat * yu_mat, 0, height - 1), dtype=xp.float32 + yd_mat = cp.asarray( + cp.clip(ycenter + fact_mat * yu_mat, 0, height - 1), dtype=cp.float32 ) - indices = [xp.reshape(yd_mat, (-1, 1)), xp.reshape(xd_mat, (-1, 1))] - indices = xp.asarray(indices, dtype=xp.float32) + indices = [cp.reshape(yd_mat, (-1, 1)), cp.reshape(xd_mat, (-1, 1))] + indices = cp.asarray(indices, dtype=cp.float32) # Loop over images and unwarp them for i in range(data.shape[0]): mat = map_coordinates(data[i], indices, order=order, mode=mode) - mat = xp.reshape(mat, (height, width)) + mat = cp.reshape(mat, (height, width)) data[i] = mat return data diff --git a/httomolibgpu/prep/normalize.py b/httomolibgpu/prep/normalize.py index d8c1625a..bcbb2c20 100644 --- a/httomolibgpu/prep/normalize.py +++ b/httomolibgpu/prep/normalize.py @@ -20,41 +20,28 @@ # --------------------------------------------------------------------------- """Modules for raw projection data normalization""" -cupy_run = False -try: - import cupy as xp - - try: - xp.cuda.Device(0).compute_capability - cupy_run = True - except xp.cuda.runtime.CUDARuntimeError: - print("CuPy library is a major dependency for HTTomolibgpu, please install") - import numpy as xp -except ImportError: - import numpy as xp - -if cupy_run: - from cupy import mean -else: - from numpy import mean -import nvtx import numpy as np +from httomolibgpu import cupywrapper + +cp = cupywrapper.cp + +nvtx = cupywrapper.nvtx + from numpy import float32 from typing import Tuple __all__ = ["normalize"] -@nvtx.annotate() def normalize( - data: xp.ndarray, - flats: xp.ndarray, - darks: xp.ndarray, + data: cp.ndarray, + flats: cp.ndarray, + darks: cp.ndarray, cutoff: float = 10.0, minus_log: bool = True, nonnegativity: bool = False, remove_nans: bool = False, -) -> xp.ndarray: +) -> cp.ndarray: """ Normalize raw projection data using the flat and dark field projections. This is a raw CUDA kernel implementation with CuPy wrappers. @@ -81,12 +68,33 @@ def normalize( cp.ndarray Normalised 3D tomographic data as a CuPy array. """ + if cupywrapper.cupy_run: + return __normalize( + data, flats, darks, cutoff, minus_log, nonnegativity, remove_nans + ) + else: + print("normalize won't be executed because CuPy is not installed") + return data + + +@nvtx.annotate() +def __normalize( + data: cp.ndarray, + flats: cp.ndarray, + darks: cp.ndarray, + cutoff: float = 10.0, + minus_log: bool = True, + nonnegativity: bool = False, + remove_nans: bool = False, +) -> cp.ndarray: + + from cupy import mean _check_valid_input(data, flats, darks) - dark0 = xp.empty(darks.shape[1:], dtype=float32) - flat0 = xp.empty(flats.shape[1:], dtype=float32) - out = xp.empty(data.shape, dtype=float32) + dark0 = cp.empty(darks.shape[1:], dtype=float32) + flat0 = cp.empty(flats.shape[1:], dtype=float32) + out = cp.empty(data.shape, dtype=float32) mean(darks, axis=0, dtype=float32, out=dark0) mean(flats, axis=0, dtype=float32, out=flat0) @@ -111,7 +119,7 @@ def normalize( kernel += "if (v > cutoff) v = cutoff;\n" kernel += "out = v;\n" - normalisation_kernel = xp.ElementwiseKernel( + normalisation_kernel = cp.ElementwiseKernel( "T data, U flats, U darks, raw float32 cutoff", "float32 out", kernel, @@ -138,6 +146,6 @@ def _check_valid_input(data, flats, darks) -> None: raise ValueError("Input darks must be 2D or 3D data only") if flats.ndim == 2: - flats = flats[xp.newaxis, :, :] + flats = flats[cp.newaxis, :, :] if darks.ndim == 2: - darks = darks[xp.newaxis, :, :] + darks = darks[cp.newaxis, :, :] diff --git a/httomolibgpu/prep/phase.py b/httomolibgpu/prep/phase.py index 54e69e45..a6febe10 100644 --- a/httomolibgpu/prep/phase.py +++ b/httomolibgpu/prep/phase.py @@ -21,21 +21,14 @@ """Modules for phase retrieval and phase-contrast enhancement""" import numpy as np -cupy_run = False -try: - import cupy as xp - try: - xp.cuda.Device(0).compute_capability - cupy_run = True - except xp.cuda.runtime.CUDARuntimeError: - print("CuPy library is a major dependency for HTTomolibgpu, please install") - import numpy as np -except ImportError: - import numpy as np +from httomolibgpu import cupywrapper + +cp = cupywrapper.cp + +nvtx = cupywrapper.nvtx from numpy import float32 from typing import Union -import nvtx import math __all__ = [ @@ -43,11 +36,11 @@ "paganin_filter_tomopy", ] + ## %%%%%%%%%%%%%%%%%%%%%%% paganin_filter %%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## #: CuPy implementation of Paganin filter from Savu -@nvtx.annotate() def paganin_filter_savu( - data: xp.ndarray, + data: cp.ndarray, ratio: float = 250.0, energy: float = 53.0, distance: float = 1.0, @@ -56,7 +49,7 @@ def paganin_filter_savu( pad_x: int = 100, pad_method: str = "edge", increment: float = 0.0, -) -> xp.ndarray: +) -> cp.ndarray: """ Apply Paganin filter (for denoising or contrast enhancement) to projections. @@ -94,15 +87,27 @@ def paganin_filter_savu( ------- cp.ndarray The stack of filtered projections. - """ - if cupy_run: - return __paganin_filter_savu(data, ratio, energy, distance, resolution, pad_y, pad_x, pad_method, increment) + """ + if cupywrapper.cupy_run: + return __paganin_filter_savu( + data, + ratio, + energy, + distance, + resolution, + pad_y, + pad_x, + pad_method, + increment, + ) else: print("__paganin_filter_savu won't be executed because CuPy is not installed") return data + +@nvtx.annotate() def __paganin_filter_savu( - data: xp.ndarray, + data: cp.ndarray, ratio: float = 250.0, energy: float = 53.0, distance: float = 1.0, @@ -111,8 +116,8 @@ def __paganin_filter_savu( pad_x: int = 100, pad_method: str = "edge", increment: float = 0.0, -) -> xp.ndarray: - +) -> cp.ndarray: + from httomolibgpu.cuda_kernels import load_cuda_module from cupyx.scipy.fft import fft2, ifft2 @@ -144,10 +149,10 @@ def __paganin_filter_savu( # Apply padding to all the 2D projections # Note: this takes considerable time on GPU... - data = xp.pad(data, ((0, 0), (pad_y, pad_y), (pad_x, pad_x)), mode=pad_method) + data = cp.pad(data, ((0, 0), (pad_y, pad_y), (pad_x, pad_x)), mode=pad_method) # Define array to hold result, which will not have the padding applied to it - precond_kernel_float = xp.ElementwiseKernel( + precond_kernel_float = cp.ElementwiseKernel( "T data", "T out", """ @@ -164,7 +169,7 @@ def __paganin_filter_savu( name="paganin_precond_float", no_return=True, ) - precond_kernel_int = xp.ElementwiseKernel( + precond_kernel_int = cp.ElementwiseKernel( "T data", "T out", """out = data == 0 ? 1 : data""", @@ -172,17 +177,17 @@ def __paganin_filter_savu( no_return=True, ) - if data.dtype in (xp.float32, xp.float64): + if data.dtype in (cp.float32, cp.float64): precond_kernel_float(data, data) else: precond_kernel_int(data, data) # avoid normalising in both directions - we include multiplier in the post_kernel - data = xp.asarray(data, dtype=xp.complex64) + data = cp.asarray(data, dtype=cp.complex64) data = fft2(data, axes=(-2, -1), overwrite_x=True, norm="backward") # prepare filter here, while the GPU is busy with the FFT - filtercomplex = xp.empty((height1, width1), dtype=xp.complex64) + filtercomplex = cp.empty((height1, width1), dtype=cp.complex64) bx = 16 by = 8 gx = (width1 + bx - 1) // bx @@ -191,12 +196,12 @@ def __paganin_filter_savu( grid=(gx, gy, 1), block=(bx, by, 1), args=( - xp.int32(width1), - xp.int32(height1), - xp.float32(resolution), - xp.float32(wavelength), - xp.float32(distance), - xp.float32(ratio), + cp.int32(width1), + cp.int32(height1), + cp.float32(resolution), + cp.float32(wavelength), + cp.float32(distance), + cp.float32(ratio), filtercomplex, ), ) @@ -204,7 +209,7 @@ def __paganin_filter_savu( data = ifft2(data, axes=(-2, -1), overwrite_x=True, norm="forward") - post_kernel = xp.ElementwiseKernel( + post_kernel = cp.ElementwiseKernel( "C pci1, raw float32 increment, raw float32 ratio, raw float32 fft_scale", "T out", "out = -0.5 * ratio * log(abs(pci1) * fft_scale + increment)", @@ -212,7 +217,7 @@ def __paganin_filter_savu( no_return=True, ) fft_scale = 1.0 / (data.shape[1] * data.shape[2]) - res = xp.empty((data.shape[0], height, width), dtype=xp.float32) + res = cp.empty((data.shape[0], height, width), dtype=cp.float32) post_kernel( data[:, pad_y : pad_y + height, pad_x : pad_x + width], np.float32(increment), @@ -222,12 +227,14 @@ def __paganin_filter_savu( ) return res + def _wavelength(energy: float) -> float: SPEED_OF_LIGHT = 299792458e2 # [cm/s] PLANCK_CONSTANT = 6.58211928e-19 # [keV*s] return 2 * math.pi * PLANCK_CONSTANT * SPEED_OF_LIGHT / energy -def _reciprocal_grid(pixel_size: float, shape_proj: tuple) -> xp.ndarray: + +def _reciprocal_grid(pixel_size: float, shape_proj: tuple) -> cp.ndarray: """ Calculate reciprocal grid. @@ -246,12 +253,13 @@ def _reciprocal_grid(pixel_size: float, shape_proj: tuple) -> xp.ndarray: # Sampling in reciprocal space. indx = _reciprocal_coord(pixel_size, shape_proj[0]) indy = _reciprocal_coord(pixel_size, shape_proj[1]) - indx_sq = xp.square(indx) - indy_sq = xp.square(indy) + indx_sq = cp.square(indx) + indy_sq = cp.square(indy) + + return cp.add.outer(indx_sq, indy_sq) - return xp.add.outer(indx_sq, indy_sq) -def _reciprocal_coord(pixel_size: float, num_grid: int) -> xp.ndarray: +def _reciprocal_coord(pixel_size: float, num_grid: int) -> cp.ndarray: """ Calculate reciprocal grid coordinates for a given pixel size and discretization. @@ -269,22 +277,21 @@ def _reciprocal_coord(pixel_size: float, num_grid: int) -> xp.ndarray: Grid coordinates. """ n = num_grid - 1 - rc = xp.arange(-n, num_grid, 2, dtype=xp.float32) + rc = cp.arange(-n, num_grid, 2, dtype=cp.float32) rc *= 2 * math.pi / (n * pixel_size) return rc + ##-------------------------------------------------------------## ##-------------------------------------------------------------## -# Adaptation with some corrections of retrieve_phase (Paganin filter) -# from TomoPy -@nvtx.annotate() +# Adaptation of retrieve_phase (Paganin filter) from TomoPy def paganin_filter_tomopy( - tomo: xp.ndarray, + tomo: cp.ndarray, pixel_size: float = 1e-4, dist: float = 50.0, energy: float = 53.0, alpha: float = 1e-3, -) -> xp.ndarray: +) -> cp.ndarray: """ Perform single-material phase retrieval from flats/darks corrected tomographic measurements :cite:`Paganin:02`. @@ -307,19 +314,21 @@ def paganin_filter_tomopy( cp.ndarray The 3D array of Paganin phase-filtered projection images. """ - if cupy_run: + if cupywrapper.cupy_run: return __paganin_filter_tomopy(tomo, pixel_size, dist, energy, alpha) else: print("paganin_filter_tomopy won't be executed because CuPy is not installed") return tomo + +@nvtx.annotate() def __paganin_filter_tomopy( - tomo: xp.ndarray, + tomo: cp.ndarray, pixel_size: float = 1e-4, dist: float = 50.0, energy: float = 53.0, alpha: float = 1e-3, -) -> xp.ndarray: +) -> cp.ndarray: from cupyx.scipy.fft import fft2, ifft2, fftshift @@ -339,16 +348,14 @@ def __paganin_filter_tomopy( dz, dy, dx = padded_tomo.shape # 3D FFT of tomo data - padded_tomo = xp.asarray(padded_tomo, dtype=xp.complex64) + padded_tomo = cp.asarray(padded_tomo, dtype=cp.complex64) fft_tomo = fft2(padded_tomo, axes=(-2, -1), overwrite_x=True) # Compute the reciprocal grid. w2 = _reciprocal_grid(pixel_size, (dy, dx)) # Build filter in the Fourier space. - phase_filter = fftshift( - _paganin_filter_factor2(energy, dist, alpha, w2) - ) + phase_filter = fftshift(_paganin_filter_factor2(energy, dist, alpha, w2)) phase_filter = phase_filter / phase_filter.max() # normalisation # Apply filter and take inverse FFT @@ -364,10 +371,10 @@ def __paganin_filter_tomopy( ) # crop the padded filtered data: - tomo = ifft_filtered_tomo[slc_indices].astype(xp.float32) + tomo = ifft_filtered_tomo[slc_indices].astype(cp.float32) # taking the negative log - _log_kernel = xp.ElementwiseKernel( + _log_kernel = cp.ElementwiseKernel( "C tomo", "C out", "out = -log(tomo)", @@ -380,7 +387,8 @@ def __paganin_filter_tomopy( def _shift_bit_length(x: int) -> int: return 1 << (x - 1).bit_length() -def _pad_projections_to_second_power(tomo: xp.ndarray) -> Union[xp.ndarray, tuple]: + +def _pad_projections_to_second_power(tomo: cp.ndarray) -> Union[cp.ndarray, tuple]: """ Performs padding of each projection to the next power of 2. If the shape is not even we also care of that before padding. @@ -395,7 +403,7 @@ def _pad_projections_to_second_power(tomo: xp.ndarray) -> Union[xp.ndarray, tupl ndarray: padded 3d projection data tuple: a tuple with padding dimensions """ - full_shape_tomo = xp.shape(tomo) + full_shape_tomo = cp.shape(tomo) pad_tup = [] for index, element in enumerate(full_shape_tomo): @@ -414,10 +422,11 @@ def _pad_projections_to_second_power(tomo: xp.ndarray) -> Union[xp.ndarray, tupl pad_tup.append(pad_width) - padded_tomo = xp.pad(tomo, tuple(pad_tup), "edge") + padded_tomo = cp.pad(tomo, tuple(pad_tup), "edge") return padded_tomo, pad_tup + def _paganin_filter_factor2(energy, dist, alpha, w2): # Alpha represents the ratio of delta/beta. return 1 / (_wavelength(energy) * dist * w2 / (4 * math.pi) + alpha) diff --git a/httomolibgpu/prep/stripe.py b/httomolibgpu/prep/stripe.py index f55a3e31..46ba04a0 100644 --- a/httomolibgpu/prep/stripe.py +++ b/httomolibgpu/prep/stripe.py @@ -21,25 +21,11 @@ """Modules for stripes removal""" import numpy as np -cupy_run = False -try: - import cupy as xp - - try: - xp.cuda.Device(0).compute_capability - cupy_run = True - except xp.cuda.runtime.CUDARuntimeError: - print("CuPy library is a major dependency for HTTomolibgpu, please install") - import numpy as xp -except ImportError: - import numpy as xp - -if cupy_run: - from cupyx.scipy.ndimage import median_filter, binary_dilation, uniform_filter1d -else: - from scipy.ndimage import median_filter, binary_dilation, uniform_filter1d - -import nvtx +from httomolibgpu import cupywrapper + +cp = cupywrapper.cp + +nvtx = cupywrapper.nvtx from typing import Union __all__ = [ @@ -49,12 +35,11 @@ ] -@nvtx.annotate() def remove_stripe_based_sorting( - data: Union[xp.ndarray, np.ndarray], + data: Union[cp.ndarray, np.ndarray], size: int = 11, dim: int = 1, -) -> Union[xp.ndarray, np.ndarray]: +) -> Union[cp.ndarray, np.ndarray]: """ Remove full and partial stripe artifacts from sinogram using Nghia Vo's approach, algorithm 3 in Ref. [1]. Angular direction is along the axis 0. @@ -84,6 +69,21 @@ def remove_stripe_based_sorting( ---------- .. [1] https://doi.org/10.1364/OE.26.028396 """ + if cupywrapper.cupy_run: + return __remove_stripe_based_sorting(data, size, dim) + else: + print( + "remove_stripe_based_sorting won't be executed because CuPy is not installed" + ) + return data + + +@nvtx.annotate() +def __remove_stripe_based_sorting( + data: Union[cp.ndarray, np.ndarray], + size: int = 11, + dim: int = 1, +) -> Union[cp.ndarray, np.ndarray]: if size is None: if data.shape[2] > 2000: @@ -102,28 +102,29 @@ def _rs_sort(sinogram, size, dim): """ Remove stripes using the sorting technique. """ - sinogram = xp.transpose(sinogram) + from cupyx.scipy.ndimage import median_filter + + sinogram = cp.transpose(sinogram) #: Sort each column of the sinogram by its grayscale values #: Keep track of the sorting indices so we can reverse it below - sortvals = xp.argsort(sinogram, axis=1) - sortvals_reverse = xp.argsort(sortvals, axis=1) - sino_sort = xp.take_along_axis(sinogram, sortvals, axis=1) + sortvals = cp.argsort(sinogram, axis=1) + sortvals_reverse = cp.argsort(sortvals, axis=1) + sino_sort = cp.take_along_axis(sinogram, sortvals, axis=1) #: Now apply the median filter on the sorted image along each row sino_sort = median_filter(sino_sort, (size, 1) if dim == 1 else (size, size)) #: step 3: re-sort the smoothed image columns to the original rows - sino_corrected = xp.take_along_axis(sino_sort, sortvals_reverse, axis=1) + sino_corrected = cp.take_along_axis(sino_sort, sortvals_reverse, axis=1) - return xp.transpose(sino_corrected) + return cp.transpose(sino_corrected) -@nvtx.annotate() def remove_stripe_ti( - data: Union[xp.ndarray, np.ndarray], + data: Union[cp.ndarray, np.ndarray], beta: float = 0.1, -) -> Union[xp.ndarray, np.ndarray]: +) -> Union[cp.ndarray, np.ndarray]: """ Removes stripes with the method of V. Titarenko (TomoCuPy implementation) @@ -140,14 +141,27 @@ def remove_stripe_ti( ndarray 3D array of de-striped projections. """ + if cupywrapper.cupy_run: + return __remove_stripe_ti(data, beta) + else: + print("remove_stripe_ti won't be executed because CuPy is not installed") + return data + + +@nvtx.annotate() +def __remove_stripe_ti( + data: Union[cp.ndarray, np.ndarray], + beta: float = 0.1, +) -> Union[cp.ndarray, np.ndarray]: + # TODO: detector dimensions must be even otherwise error - gamma = beta * ((1 - beta) / (1 + beta)) ** xp.abs( - xp.fft.fftfreq(data.shape[-1]) * data.shape[-1] + gamma = beta * ((1 - beta) / (1 + beta)) ** cp.abs( + cp.fft.fftfreq(data.shape[-1]) * data.shape[-1] ) gamma[0] -= 1 - v = xp.mean(data, axis=0) + v = cp.mean(data, axis=0) v = v - v[:, 0:1] - v = xp.fft.irfft(xp.fft.rfft(v) * xp.fft.rfft(gamma)).astype(data.dtype) + v = cp.fft.irfft(cp.fft.rfft(v) * cp.fft.rfft(gamma)).astype(data.dtype) data[:] += v return data @@ -176,14 +190,13 @@ def remove_stripe_ti( # # # # # *************************************************************************** # -@nvtx.annotate() def remove_all_stripe( - data: xp.ndarray, + data: cp.ndarray, snr: float = 3.0, la_size: int = 61, sm_size: int = 21, dim: int = 1, -) -> xp.ndarray: +) -> cp.ndarray: """ Remove all types of stripe artifacts from sinogram using Nghia Vo's approach :cite:`Vo:18` (combination of algorithm 3,4,5, and 6). @@ -212,6 +225,22 @@ def remove_all_stripe( .. [1] https://doi.org/10.1364/OE.26.028396 """ + if cupywrapper.cupy_run: + return __remove_all_stripe(data, snr, la_size, sm_size, dim) + else: + print("remove_all_stripe won't be executed because CuPy is not installed") + return data + + +@nvtx.annotate() +def __remove_all_stripe( + data: cp.ndarray, + snr: float = 3.0, + la_size: int = 61, + sm_size: int = 21, + dim: int = 1, +) -> cp.ndarray: + matindex = _create_matindex(data.shape[2], data.shape[0]) for m in range(data.shape[1]): sino = data[:, m, :] @@ -226,38 +255,40 @@ def _rs_sort2(sinogram, size, matindex, dim): """ Remove stripes using the sorting technique. """ - sinogram = xp.transpose(sinogram) - matcomb = xp.asarray(xp.dstack((matindex, sinogram))) + from cupyx.scipy.ndimage import median_filter + + sinogram = cp.transpose(sinogram) + matcomb = cp.asarray(cp.dstack((matindex, sinogram))) - # matsort = xp.asarray([row[row[:, 1].argsort()] for row in matcomb]) - ids = xp.argsort(matcomb[:, :, 1], axis=1) + # matsort = cp.asarray([row[row[:, 1].argsort()] for row in matcomb]) + ids = cp.argsort(matcomb[:, :, 1], axis=1) matsort = matcomb.copy() - matsort[:, :, 0] = xp.take_along_axis(matsort[:, :, 0], ids, axis=1) - matsort[:, :, 1] = xp.take_along_axis(matsort[:, :, 1], ids, axis=1) + matsort[:, :, 0] = cp.take_along_axis(matsort[:, :, 0], ids, axis=1) + matsort[:, :, 1] = cp.take_along_axis(matsort[:, :, 1], ids, axis=1) if dim == 1: matsort[:, :, 1] = median_filter(matsort[:, :, 1], (size, 1)) else: matsort[:, :, 1] = median_filter(matsort[:, :, 1], (size, size)) - # matsortback = xp.asarray([row[row[:, 0].argsort()] for row in matsort]) + # matsortback = cp.asarray([row[row[:, 0].argsort()] for row in matsort]) - ids = xp.argsort(matsort[:, :, 0], axis=1) + ids = cp.argsort(matsort[:, :, 0], axis=1) matsortback = matsort.copy() - matsortback[:, :, 0] = xp.take_along_axis(matsortback[:, :, 0], ids, axis=1) - matsortback[:, :, 1] = xp.take_along_axis(matsortback[:, :, 1], ids, axis=1) + matsortback[:, :, 0] = cp.take_along_axis(matsortback[:, :, 0], ids, axis=1) + matsortback[:, :, 1] = cp.take_along_axis(matsortback[:, :, 1], ids, axis=1) sino_corrected = matsortback[:, :, 1] - return xp.transpose(sino_corrected) + return cp.transpose(sino_corrected) @nvtx.annotate() def _mpolyfit(x, y): n = len(x) - x_mean = xp.mean(x) - y_mean = xp.mean(y) + x_mean = cp.mean(x) + y_mean = cp.mean(y) - Sxy = xp.sum(x * y) - n * x_mean * y_mean - Sxx = xp.sum(x * x) - n * x_mean * x_mean + Sxy = cp.sum(x * y) - n * x_mean * y_mean + Sxx = cp.sum(x * x) - n * x_mean * x_mean slope = Sxy / Sxx intercept = y_mean - slope * x_mean @@ -269,22 +300,23 @@ def _detect_stripe(listdata, snr): """ Algorithm 4 in :cite:`Vo:18`. Used to locate stripes. """ + numdata = len(listdata) - listsorted = xp.sort(listdata)[::-1] - xlist = xp.arange(0, numdata, 1.0) - ndrop = xp.int16(0.25 * numdata) - # (_slope, _intercept) = xp.polyfit(xlist[ndrop:-ndrop - 1], + listsorted = cp.sort(listdata)[::-1] + xlist = cp.arange(0, numdata, 1.0) + ndrop = cp.int16(0.25 * numdata) + # (_slope, _intercept) = cp.polyfit(xlist[ndrop:-ndrop - 1], # listsorted[ndrop:-ndrop - 1], 1) (_slope, _intercept) = _mpolyfit( xlist[ndrop : -ndrop - 1], listsorted[ndrop : -ndrop - 1] ) numt1 = _intercept + _slope * xlist[-1] - noiselevel = xp.abs(numt1 - _intercept) - noiselevel = xp.clip(noiselevel, 1e-6, None) - val1 = xp.abs(listsorted[0] - _intercept) / noiselevel - val2 = xp.abs(listsorted[-1] - numt1) / noiselevel - listmask = xp.zeros_like(listdata) + noiselevel = cp.abs(numt1 - _intercept) + noiselevel = cp.clip(noiselevel, 1e-6, None) + val1 = cp.abs(listsorted[0] - _intercept) / noiselevel + val2 = cp.abs(listsorted[-1] - numt1) / noiselevel + listmask = cp.zeros_like(listdata) if val1 >= snr: upper_thresh = _intercept + noiselevel * snr * 0.5 listmask[listdata > upper_thresh] = 1.0 @@ -299,16 +331,19 @@ def _rs_large(sinogram, snr, size, matindex, drop_ratio=0.1, norm=True): """ Remove large stripes. """ - drop_ratio = max(min(drop_ratio, 0.8), 0) # = xp.clip(drop_ratio, 0.0, 0.8) + from cupyx.scipy.ndimage import median_filter + from cupyx.scipy.ndimage import binary_dilation + + drop_ratio = max(min(drop_ratio, 0.8), 0) # = cp.clip(drop_ratio, 0.0, 0.8) (nrow, ncol) = sinogram.shape ndrop = int(0.5 * drop_ratio * nrow) - sinosort = xp.sort(sinogram, axis=0) + sinosort = cp.sort(sinogram, axis=0) sinosmooth = median_filter(sinosort, (1, size)) - list1 = xp.mean(sinosort[ndrop : nrow - ndrop], axis=0) - list2 = xp.mean(sinosmooth[ndrop : nrow - ndrop], axis=0) - # listfact = xp.divide(list1, + list1 = cp.mean(sinosort[ndrop : nrow - ndrop], axis=0) + list2 = cp.mean(sinosmooth[ndrop : nrow - ndrop], axis=0) + # listfact = cp.divide(list1, # list2, - # out=xp.ones_like(list1), + # out=cp.ones_like(list1), # where=list2 != 0) listfact = list1 / list2 @@ -316,28 +351,28 @@ def _rs_large(sinogram, snr, size, matindex, drop_ratio=0.1, norm=True): # Locate stripes listmask = _detect_stripe(listfact, snr) listmask = binary_dilation(listmask, iterations=1).astype(listmask.dtype) - matfact = xp.tile(listfact, (nrow, 1)) + matfact = cp.tile(listfact, (nrow, 1)) # Normalize if norm is True: sinogram = sinogram / matfact - sinogram1 = xp.transpose(sinogram) - matcombine = xp.asarray(xp.dstack((matindex, sinogram1))) + sinogram1 = cp.transpose(sinogram) + matcombine = cp.asarray(cp.dstack((matindex, sinogram1))) - # matsort = xp.asarray([row[row[:, 1].argsort()] for row in matcombine]) - ids = xp.argsort(matcombine[:, :, 1], axis=1) + # matsort = cp.asarray([row[row[:, 1].argsort()] for row in matcombine]) + ids = cp.argsort(matcombine[:, :, 1], axis=1) matsort = matcombine.copy() - matsort[:, :, 0] = xp.take_along_axis(matsort[:, :, 0], ids, axis=1) - matsort[:, :, 1] = xp.take_along_axis(matsort[:, :, 1], ids, axis=1) + matsort[:, :, 0] = cp.take_along_axis(matsort[:, :, 0], ids, axis=1) + matsort[:, :, 1] = cp.take_along_axis(matsort[:, :, 1], ids, axis=1) - matsort[:, :, 1] = xp.transpose(sinosmooth) - # matsortback = xp.asarray([row[row[:, 0].argsort()] for row in matsort]) - ids = xp.argsort(matsort[:, :, 0], axis=1) + matsort[:, :, 1] = cp.transpose(sinosmooth) + # matsortback = cp.asarray([row[row[:, 0].argsort()] for row in matsort]) + ids = cp.argsort(matsort[:, :, 0], axis=1) matsortback = matsort.copy() - matsortback[:, :, 0] = xp.take_along_axis(matsortback[:, :, 0], ids, axis=1) - matsortback[:, :, 1] = xp.take_along_axis(matsortback[:, :, 1], ids, axis=1) + matsortback[:, :, 0] = cp.take_along_axis(matsortback[:, :, 0], ids, axis=1) + matsortback[:, :, 1] = cp.take_along_axis(matsortback[:, :, 1], ids, axis=1) - sino_corrected = xp.transpose(matsortback[:, :, 1]) - listxmiss = xp.where(listmask > 0.0)[0] + sino_corrected = cp.transpose(matsortback[:, :, 1]) + listxmiss = cp.where(listmask > 0.0)[0] sinogram[:, listxmiss] = sino_corrected[:, listxmiss] return sinogram @@ -347,12 +382,16 @@ def _rs_dead(sinogram, snr, size, matindex, norm=True): """ Remove unresponsive and fluctuating stripes. """ - sinogram = xp.copy(sinogram) # Make it mutable + from cupyx.scipy.ndimage import median_filter + from cupyx.scipy.ndimage import binary_dilation + from cupyx.scipy.ndimage import uniform_filter1d + + sinogram = cp.copy(sinogram) # Make it mutable (nrow, _) = sinogram.shape - # sinosmooth = xp.apply_along_axis(uniform_filter1d, 0, sinogram, 10) + # sinosmooth = cp.apply_along_axis(uniform_filter1d, 0, sinogram, 10) sinosmooth = uniform_filter1d(sinogram, 10, axis=0) - listdiff = xp.sum(xp.abs(sinogram - sinosmooth), axis=0) + listdiff = cp.sum(cp.abs(sinogram - sinosmooth), axis=0) listdiffbck = median_filter(listdiff, size) listfact = listdiff / listdiffbck @@ -361,16 +400,16 @@ def _rs_dead(sinogram, snr, size, matindex, norm=True): listmask = binary_dilation(listmask, iterations=1).astype(listmask.dtype) listmask[0:2] = 0.0 listmask[-2:] = 0.0 - listx = xp.where(listmask < 1.0)[0] - listy = xp.arange(nrow) + listx = cp.where(listmask < 1.0)[0] + listy = cp.arange(nrow) matz = sinogram[:, listx] - listxmiss = xp.where(listmask > 0.0)[0] + listxmiss = cp.where(listmask > 0.0)[0] # finter = interpolate.interp2d(listx.get(), listy.get(), matz.get(), kind='linear') if len(listxmiss) > 0: # sinogram_c[:, listxmiss.get()] = finter(listxmiss.get(), listy.get()) - ids = xp.searchsorted(listx, listxmiss) + ids = cp.searchsorted(listx, listxmiss) sinogram[:, listxmiss] = matz[:, ids - 1] + (listxmiss - listx[ids - 1]) * ( matz[:, ids] - matz[:, ids - 1] ) / (listx[ids] - listx[ids - 1]) @@ -386,6 +425,6 @@ def _create_matindex(nrow, ncol): """ Create a 2D array of indexes used for the sorting technique. """ - listindex = xp.arange(0.0, ncol, 1.0) - matindex = xp.tile(listindex, (nrow, 1)) + listindex = cp.arange(0.0, ncol, 1.0) + matindex = cp.tile(listindex, (nrow, 1)) return matindex.astype(np.float32) diff --git a/httomolibgpu/recon/algorithm.py b/httomolibgpu/recon/algorithm.py index 4b126066..1de5400d 100644 --- a/httomolibgpu/recon/algorithm.py +++ b/httomolibgpu/recon/algorithm.py @@ -21,30 +21,14 @@ """Module for tomographic reconstruction""" import numpy as np -cupy_run = False -try: - import cupy as xp - - try: - xp.cuda.Device(0).compute_capability - cupy_run = True - except xp.cuda.runtime.CUDARuntimeError: - print("CuPy library is a major dependency for HTTomolibgpu, please install") - import numpy as xp -except ImportError: - import numpy as xp - -import nvtx +from httomolibgpu import cupywrapper + +cp = cupywrapper.cp + +nvtx = cupywrapper.nvtx from numpy import float32, complex64 from typing import Optional, Type -if cupy_run: - from tomobar.methodsDIR_CuPy import RecToolsDIRCuPy - from tomobar.methodsIR_CuPy import RecToolsIRCuPy -else: - from tomobar.methodsDIR import RecToolsDIR as RecToolsDIRCuPy - from tomobar.methodsIR import RecToolsIR as RecToolsIRCuPy - __all__ = [ "FBP", "SIRT", @@ -55,16 +39,15 @@ ## %%%%%%%%%%%%%%%%%%%%%%% FBP reconstruction %%%%%%%%%%%%%%%%%%%%%%%%%%%% ## -@nvtx.annotate() def FBP( - data: xp.ndarray, + data: cp.ndarray, angles: np.ndarray, center: Optional[float] = None, filter_freq_cutoff: Optional[float] = 0.6, recon_size: Optional[int] = None, recon_mask_radius: Optional[float] = None, gpu_id: int = 0, -) -> xp.ndarray: +) -> cp.ndarray: """ Perform Filtered Backprojection (FBP) reconstruction using ASTRA toolbox and ToMoBAR wrappers. This is a 3D recon from a CuPy array and a custom built filter. @@ -94,6 +77,32 @@ def FBP( cp.ndarray The FBP reconstructed volume as a CuPy array. """ + if cupywrapper.cupy_run: + return __FBP( + data, + angles, + center, + filter_freq_cutoff, + recon_size, + recon_mask_radius, + gpu_id, + ) + else: + print("FBP won't be executed because CuPy is not installed") + return data + + +@nvtx.annotate() +def __FBP( + data: cp.ndarray, + angles: np.ndarray, + center: Optional[float] = None, + filter_freq_cutoff: Optional[float] = 0.6, + recon_size: Optional[int] = None, + recon_mask_radius: Optional[float] = None, + gpu_id: int = 0, +) -> cp.ndarray: + RecToolsCP = _instantiate_direct_recon_class( data, angles, center, recon_size, gpu_id ) @@ -104,21 +113,20 @@ def FBP( recon_mask_radius=recon_mask_radius, data_axes_labels_order=input_data_axis_labels, ) - xp._default_memory_pool.free_all_blocks() - return xp.require(xp.swapaxes(reconstruction, 0, 1), requirements="C") + cp._default_memory_pool.free_all_blocks() + return cp.require(cp.swapaxes(reconstruction, 0, 1), requirements="C") ## %%%%%%%%%%%%%%%%%%%%%%% SIRT reconstruction %%%%%%%%%%%%%%%%%%%%%%%%%%%% ## -@nvtx.annotate() def SIRT( - data: xp.ndarray, + data: cp.ndarray, angles: np.ndarray, center: Optional[float] = None, recon_size: Optional[int] = None, iterations: Optional[int] = 300, nonnegativity: Optional[bool] = True, gpu_id: int = 0, -) -> xp.ndarray: +) -> cp.ndarray: """ Perform Simultaneous Iterative Recostruction Technique (SIRT) using ASTRA toolbox and ToMoBAR wrappers. This is 3D recon directly from a CuPy array while using ASTRA GPUlink capability. @@ -146,6 +154,31 @@ def SIRT( cp.ndarray The SIRT reconstructed volume as a CuPy array. """ + if cupywrapper.cupy_run: + return __SIRT( + data, + angles, + center, + recon_size, + iterations, + nonnegativity, + gpu_id, + ) + else: + print("SIRT won't be executed because CuPy is not installed") + return data + + +@nvtx.annotate() +def __SIRT( + data: cp.ndarray, + angles: np.ndarray, + center: Optional[float] = None, + recon_size: Optional[int] = None, + iterations: Optional[int] = 300, + nonnegativity: Optional[bool] = True, + gpu_id: int = 0, +) -> cp.ndarray: RecToolsCP = _instantiate_iterative_recon_class( data, angles, center, recon_size, gpu_id, datafidelity="LS" @@ -160,21 +193,20 @@ def SIRT( "nonnegativity": nonnegativity, } reconstruction = RecToolsCP.SIRT(_data_, _algorithm_) - xp._default_memory_pool.free_all_blocks() - return xp.require(xp.swapaxes(reconstruction, 0, 1), requirements="C") + cp._default_memory_pool.free_all_blocks() + return cp.require(cp.swapaxes(reconstruction, 0, 1), requirements="C") ## %%%%%%%%%%%%%%%%%%%%%%% CGLS reconstruction %%%%%%%%%%%%%%%%%%%%%%%%%%%% ## -@nvtx.annotate() def CGLS( - data: xp.ndarray, + data: cp.ndarray, angles: np.ndarray, center: Optional[float] = None, recon_size: Optional[int] = None, iterations: Optional[int] = 20, nonnegativity: Optional[bool] = True, gpu_id: int = 0, -) -> xp.ndarray: +) -> cp.ndarray: """ Perform Congugate Gradient Least Squares (CGLS) using ASTRA toolbox and ToMoBAR wrappers. This is 3D recon directly from a CuPy array while using ASTRA GPUlink capability. @@ -202,6 +234,32 @@ def CGLS( cp.ndarray The CGLS reconstructed volume as a CuPy array. """ + if cupywrapper.cupy_run: + return __CGLS( + data, + angles, + center, + recon_size, + iterations, + nonnegativity, + gpu_id, + ) + else: + print("CGLS won't be executed because CuPy is not installed") + return data + + +@nvtx.annotate() +def __CGLS( + data: cp.ndarray, + angles: np.ndarray, + center: Optional[float] = None, + recon_size: Optional[int] = None, + iterations: Optional[int] = 20, + nonnegativity: Optional[bool] = True, + gpu_id: int = 0, +) -> cp.ndarray: + RecToolsCP = _instantiate_iterative_recon_class( data, angles, center, recon_size, gpu_id, datafidelity="LS" ) @@ -212,18 +270,18 @@ def CGLS( } # data dictionary _algorithm_ = {"iterations": iterations, "nonnegativity": nonnegativity} reconstruction = RecToolsCP.CGLS(_data_, _algorithm_) - xp._default_memory_pool.free_all_blocks() - return xp.require(xp.swapaxes(reconstruction, 0, 1), requirements="C") + cp._default_memory_pool.free_all_blocks() + return cp.require(cp.swapaxes(reconstruction, 0, 1), requirements="C") ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## def _instantiate_direct_recon_class( - data: xp.ndarray, + data: cp.ndarray, angles: np.ndarray, center: Optional[float] = None, recon_size: Optional[int] = None, gpu_id: int = 0, -) -> Type[RecToolsDIRCuPy]: +) -> Type: """instantiate ToMoBAR's direct recon class Args: @@ -236,6 +294,8 @@ def _instantiate_direct_recon_class( Returns: Type[RecToolsDIRCuPy]: an instance of the direct recon class """ + from tomobar.methodsDIR_CuPy import RecToolsDIRCuPy + if center is None: center = data.shape[2] // 2 # making a crude guess if recon_size is None: @@ -254,13 +314,13 @@ def _instantiate_direct_recon_class( def _instantiate_iterative_recon_class( - data: xp.ndarray, + data: cp.ndarray, angles: np.ndarray, center: Optional[float] = None, recon_size: Optional[int] = None, gpu_id: int = 0, datafidelity: str = "LS", -) -> Type[RecToolsIRCuPy]: +) -> Type: """instantiate ToMoBAR's iterative recon class Args: @@ -274,6 +334,8 @@ def _instantiate_iterative_recon_class( Returns: Type[RecToolsIRCuPy]: an instance of the iterative class """ + from tomobar.methodsIR_CuPy import RecToolsIRCuPy + if center is None: center = data.shape[2] // 2 # making a crude guess if recon_size is None: diff --git a/httomolibgpu/recon/rotation.py b/httomolibgpu/recon/rotation.py index 6e6feba1..dbade838 100644 --- a/httomolibgpu/recon/rotation.py +++ b/httomolibgpu/recon/rotation.py @@ -21,42 +21,24 @@ """Modules for finding the axis of rotation""" import numpy as np -cupy_run = False -try: - import cupy as xp +from httomolibgpu import cupywrapper - try: - xp.cuda.Device(0).compute_capability - cupy_run = True +cp = cupywrapper.cp - except xp.cuda.runtime.CUDARuntimeError: - print("CuPy library is a major dependency for HTTomolibgpu, please install") - import numpy as np -except ImportError: - import numpy as np +nvtx = cupywrapper.nvtx -import nvtx import math from typing import List, Literal, Optional, Tuple, Union -if cupy_run: - from httomolibgpu.cuda_kernels import load_cuda_module - from cupyx.scipy.ndimage import shift, gaussian_filter - from cupyx.scipy.fftpack import get_fft_plan - from cupyx.scipy.fft import rfft2 -else: - from scipy.ndimage import shift, gaussian_filter - from scipy.fft import fftfreq as get_fft_plan # get_fft_plan doesn't exist in scipyfft - from scipy.fft import rfft2 - __all__ = [ "find_center_vo", - #"find_center_360", - #"find_center_pc", + "find_center_360", + "find_center_pc", ] + def find_center_vo( - data: xp.ndarray, + data: cp.ndarray, ind: Optional[int] = None, smin: int = -50, smax: int = 50, @@ -96,12 +78,17 @@ def find_center_vo( float Rotation axis location. """ - return __find_center_vo(data, ind, smin, smax, srad, step, ratio, drop) + if cupywrapper.cupy_run: + return __find_center_vo(data, ind, smin, smax, srad, step, ratio, drop) + else: + print("find_center_vo won't be executed because CuPy is not installed") + return 0.0 + # %%%%%%%%%%%%%%%%%%%%%%%%%find_center_vo%%%%%%%%%%%%%%%%%%%%%%%%%%%% @nvtx.annotate() def __find_center_vo( - data: xp.ndarray, + data: cp.ndarray, ind: Optional[int] = None, smin: int = -50, smax: int = 50, @@ -111,8 +98,10 @@ def __find_center_vo( drop: int = 20, ) -> float: + from cupyx.scipy.ndimage import gaussian_filter + if data.ndim == 2: - data = xp.expand_dims(data, 1) + data = cp.expand_dims(data, 1) ind = 0 height = data.shape[1] @@ -120,7 +109,7 @@ def __find_center_vo( if ind is None: ind = height // 2 if height > 10: - _sino = xp.mean(data[:, ind - 5 : ind + 5, :], axis=1) + _sino = cp.mean(data[:, ind - 5 : ind + 5, :], axis=1) else: _sino = data[:, ind, :] else: @@ -141,14 +130,14 @@ def __find_center_vo( init_cen = _search_coarse(_sino_cs, smin, smax, ratio, drop) fine_cen = _search_fine(_sino_fs, srad, step, init_cen, ratio, drop) - return xp.asnumpy(fine_cen) + return cp.asnumpy(fine_cen) @nvtx.annotate() def _search_coarse(sino, smin, smax, ratio, drop): (nrow, ncol) = sino.shape - flip_sino = xp.ascontiguousarray(xp.fliplr(sino)) - comp_sino = xp.ascontiguousarray(xp.flipud(sino)) + flip_sino = cp.ascontiguousarray(cp.fliplr(sino)) + comp_sino = cp.ascontiguousarray(cp.flipud(sino)) mask = _create_mask(2 * nrow, ncol, 0.5 * ratio * ncol, drop) cen_fliplr = (ncol - 1.0) / 2.0 @@ -158,12 +147,12 @@ def _search_coarse(sino, smin, smax, ratio, drop): smax = smax_clip_val - cen_fliplr start_cor = ncol // 2 + smin stop_cor = ncol // 2 + smax - list_cor = xp.arange(start_cor, stop_cor + 0.5, 0.5, dtype=xp.float32) + list_cor = cp.arange(start_cor, stop_cor + 0.5, 0.5, dtype=cp.float32) list_shift = 2.0 * (list_cor - cen_fliplr) - list_metric = xp.empty(list_shift.shape, dtype=xp.float32) + list_metric = cp.empty(list_shift.shape, dtype=cp.float32) _calculate_metric(list_shift, sino, flip_sino, comp_sino, mask, list_metric) - minpos = xp.argmin(list_metric) + minpos = cp.argmin(list_metric) if minpos == 0: print("WARNING!!!Global minimum is out of searching range") print(f"Please extend smin: {smin}") @@ -178,25 +167,27 @@ def _search_coarse(sino, smin, smax, ratio, drop): def _search_fine(sino, srad, step, init_cen, ratio, drop): (nrow, ncol) = sino.shape - flip_sino = xp.ascontiguousarray(xp.fliplr(sino)) - comp_sino = xp.ascontiguousarray(xp.flipud(sino)) + flip_sino = cp.ascontiguousarray(cp.fliplr(sino)) + comp_sino = cp.ascontiguousarray(cp.flipud(sino)) mask = _create_mask(2 * nrow, ncol, 0.5 * ratio * ncol, drop) cen_fliplr = (ncol - 1.0) / 2.0 srad = max(min(abs(float(srad)), ncol / 4.0), 1.0) step = max(min(abs(step), srad), 0.1) init_cen = max(min(init_cen, ncol - srad - 1), srad) - list_cor = init_cen + xp.arange(-srad, srad + step, step, dtype=np.float32) + list_cor = init_cen + cp.arange(-srad, srad + step, step, dtype=np.float32) list_shift = 2.0 * (list_cor - cen_fliplr) - list_metric = xp.empty(list_shift.shape, dtype="float32") + list_metric = cp.empty(list_shift.shape, dtype="float32") _calculate_metric(list_shift, sino, flip_sino, comp_sino, mask, out=list_metric) - cor = list_cor[xp.argmin(list_metric)] + cor = list_cor[cp.argmin(list_metric)] return cor @nvtx.annotate() def _create_mask(nrow, ncol, radius, drop): + from httomolibgpu.cuda_kernels import load_cuda_module + du = 1.0 / ncol dv = (nrow - 1.0) / (nrow * 2.0 * np.pi) cen_row = int(math.ceil(nrow / 2.0) - 1) @@ -209,16 +200,16 @@ def _create_mask(nrow, ncol, radius, drop): grid_x = (ncol // 2 + 1 + block_x - 1) // block_x grid_y = nrow grid_dims = (grid_x, grid_y) - mask = xp.empty((nrow, ncol // 2 + 1), dtype="uint16") + mask = cp.empty((nrow, ncol // 2 + 1), dtype="uint16") params = ( ncol, nrow, cen_col, cen_row, - xp.float32(du), - xp.float32(dv), - xp.float32(radius), - xp.float32(drop), + cp.float32(du), + cp.float32(dv), + cp.float32(radius), + cp.float32(drop), mask, ) module = load_cuda_module("generate_mask") @@ -235,12 +226,12 @@ def round_up(x: float) -> int: def _get_available_gpu_memory() -> int: - dev = xp.cuda.Device() + dev = cp.cuda.Device() # first, let's make some space - xp.get_default_memory_pool().free_all_blocks() - cache = xp.fft.config.get_plan_cache() + cp.get_default_memory_pool().free_all_blocks() + cache = cp.fft.config.get_plan_cache() cache.clear() - available_memory = dev.mem_info[0] + xp.get_default_memory_pool().free_bytes() + available_memory = dev.mem_info[0] + cp.get_default_memory_pool().free_bytes() return int(available_memory * 0.9) # 10% safety margin @@ -268,12 +259,17 @@ def _calculate_chunks( @nvtx.annotate() def _calculate_metric(list_shift, sino1, sino2, sino3, mask, out): + from httomolibgpu.cuda_kernels import load_cuda_module + from cupyx.scipy.ndimage import shift + from cupyx.scipy.fftpack import get_fft_plan + from cupyx.scipy.fft import rfft2 + # this tries to simplify - if shift_col is integer, no need to spline interpolate - assert list_shift.dtype == xp.float32, "shifts must be single precision floats" - assert sino1.dtype == xp.float32, "sino1 must be float32" - assert sino2.dtype == xp.float32, "sino1 must be float32" - assert sino3.dtype == xp.float32, "sino1 must be float32" - assert out.dtype == xp.float32, "sino1 must be float32" + assert list_shift.dtype == cp.float32, "shifts must be single precision floats" + assert sino1.dtype == cp.float32, "sino1 must be float32" + assert sino2.dtype == cp.float32, "sino1 must be float32" + assert sino3.dtype == cp.float32, "sino1 must be float32" + assert out.dtype == cp.float32, "sino1 must be float32" assert sino2.flags["C_CONTIGUOUS"], "sino2 must be C-contiguous" assert sino3.flags["C_CONTIGUOUS"], "sino3 must be C-contiguous" assert list_shift.flags["C_CONTIGUOUS"], "list_shift must be C-contiguous" @@ -285,7 +281,7 @@ def _calculate_metric(list_shift, sino1, sino2, sino3, mask, out): shift_whole_shifts = module.get_function("shift_whole_shifts") # note: we don't have to calculate the mean here, as we're only looking for minimum metric. # The sum is enough. - masked_sum_abs_kernel = xp.ReductionKernel( + masked_sum_abs_kernel = cp.ReductionKernel( in_params="complex64 x, uint16 mask", # input, complex + mask out_params="float32 out", # output, real map_expr="mask ? abs(x) : 0.0f", @@ -299,21 +295,19 @@ def _calculate_metric(list_shift, sino1, sino2, sino3, mask, out): # determine how many shifts we can fit in the available memory # and iterate in chunks chunks = _calculate_chunks( - nshifts, (na1 + na2) * sino2.shape[1] * xp.float32().nbytes + nshifts, (na1 + na2) * sino2.shape[1] * cp.float32().nbytes ) - mat = xp.empty((chunks[0], na1 + na2, sino2.shape[1]), dtype=xp.float32) + mat = cp.empty((chunks[0], na1 + na2, sino2.shape[1]), dtype=cp.float32) mat[:, :na1, :] = sino1 # explicitly create FFT plan here, so it's not cached and clearly re-used - plan = get_fft_plan( - mat, mat.shape[-2:], axes=(1, 2), value_type="R2C" - ) + plan = get_fft_plan(mat, mat.shape[-2:], axes=(1, 2), value_type="R2C") for i, stop_idx in enumerate(chunks): if i > 0: # more than one iteration means we're tight on memory, so clear up freed blocks mat_freq = None - xp.get_default_memory_pool().free_all_blocks() + cp.get_default_memory_pool().free_all_blocks() start_idx = 0 if i == 0 else chunks[i - 1] size = stop_idx - start_idx @@ -336,7 +330,7 @@ def _calculate_metric(list_shift, sino1, sino2, sino3, mask, out): ) # now we can only look at the spline shifting, the rest is done - list_shift_host = xp.asnumpy(list_shift[start_idx:stop_idx]) + list_shift_host = cp.asnumpy(list_shift[start_idx:stop_idx]) for i in range(list_shift_host.shape[0]): shift_col = float(list_shift_host[i]) if not shift_col.is_integer(): @@ -358,7 +352,9 @@ def _calculate_metric(list_shift, sino1, sino2, sino3, mask, out): @nvtx.annotate() def _downsample(sino, level, axis): - assert sino.dtype == xp.float32, "single precision floating point input required" + from httomolibgpu.cuda_kernels import load_cuda_module + + assert sino.dtype == cp.float32, "single precision floating point input required" assert sino.flags["C_CONTIGUOUS"], "list_shift must be C-contiguous" dx, dz = sino.shape @@ -366,7 +362,7 @@ def _downsample(sino, level, axis): dim = int(sino.shape[axis] / math.pow(2, level)) shape = [dx, dz] shape[axis] = dim - downsampled_data = xp.empty(shape, dtype="float32") + downsampled_data = cp.empty(shape, dtype="float32") block_x = 8 block_y = 8 @@ -386,393 +382,423 @@ def _downsample(sino, level, axis): ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# # %%%%%%%%%%%%%%%%%%%%%%%%%find_center_360%%%%%%%%%%%%%%%%%%%%%%%%% +# --- Center of rotation (COR) estimation method ---# -# # %%%%%%%%%%%%%%%%%%%%%%%%%find_center_360%%%%%%%%%%%%%%%%%%%%%%%%% -# # --- Center of rotation (COR) estimation method ---# -# @nvtx.annotate() -# def find_center_360( -# data: xp.ndarray, -# ind: Optional[int] = None, -# win_width: int = 10, -# side: Optional[Literal[0, 1]] = None, -# denoise: bool = True, -# norm: bool = False, -# use_overlap: bool = False, -# ) -> Tuple[float, float, Optional[Literal[0, 1]], float]: -# """ -# Find the center-of-rotation (COR) in a 360-degree scan with offset COR use -# the method presented in Ref. [1] by Nghia Vo. - -# This function supports both numpy and cupy - the implementation is selected -# by where the input data array resides. - -# Parameters -# ---------- -# data : cp.ndarray -# 3D tomographic data as a Cupy array. -# ind : int, optional -# Index of the slice to be used for estimate the CoR and the overlap. -# win_width : int, optional -# Window width used for finding the overlap area. -# side : {None, 0, 1}, optional -# Overlap size. Only there options: None, 0, or 1. "None" corresponds -# to fully automated determination. "0" corresponds to the left side. -# "1" corresponds to the right side. -# denoise : bool, optional -# Apply the Gaussian filter if True. -# norm : bool, optional -# Apply the normalisation if True. -# use_overlap : bool, optional -# Use the combination of images in the overlap area for calculating -# correlation coefficients if True. - -# Returns -# ------- -# cor : float -# Center-of-rotation. -# overlap : float -# Width of the overlap area between two halves of the sinogram. -# side : int -# Overlap side between two halves of the sinogram. -# overlap_position : float -# Position of the window in the first image giving the best -# correlation metric. - -# References -# ---------- -# [1] : https://doi.org/10.1364/OE.418448 -# """ -# if data.ndim != 3: -# raise ValueError("A 3D array must be provided") - -# # this method works with a 360-degree sinogram. -# if ind is None: -# _sino = data[:, 0, :] -# else: -# _sino = data[:, ind, :] - -# (nrow, ncol) = _sino.shape -# nrow_180 = nrow // 2 + 1 -# sino_top = _sino[0:nrow_180, :] -# sino_bot = xp.fliplr(_sino[-nrow_180:, :]) -# (overlap, side, overlap_position) = _find_overlap( -# sino_top, sino_bot, win_width, side, denoise, norm, use_overlap -# ) -# if side == 0: -# cor = overlap / 2.0 - 1.0 -# else: -# cor = ncol - overlap / 2.0 - 1.0 - -# return cor, overlap, side, overlap_position - - -# def _find_overlap( -# mat1, mat2, win_width, side=None, denoise=True, norm=False, use_overlap=False -# ): -# """ -# Find the overlap area and overlap side between two images (Ref. [1]) where -# the overlap side referring to the first image. - -# Parameters -# ---------- -# mat1 : array_like -# 2D array. Projection image or sinogram image. -# mat2 : array_like -# 2D array. Projection image or sinogram image. -# win_width : int -# Width of the searching window. -# side : {None, 0, 1}, optional -# Only there options: None, 0, or 1. "None" corresponding to fully -# automated determination. "0" corresponding to the left side. "1" -# corresponding to the right side. -# denoise : bool, optional -# Apply the Gaussian filter if True. -# norm : bool, optional -# Apply the normalization if True. -# use_overlap : bool, optional -# Use the combination of images in the overlap area for calculating -# correlation coefficients if True. - -# Returns -# ------- -# overlap : float -# Width of the overlap area between two images. -# side : int -# Overlap side between two images. -# overlap_position : float -# Position of the window in the first image giving the best -# correlation metric. - -# """ -# ncol1 = mat1.shape[1] -# ncol2 = mat2.shape[1] -# win_width = int(np.clip(win_width, 6, min(ncol1, ncol2) // 2)) - -# if side == 1: -# (list_metric, offset) = _search_overlap( -# mat1, -# mat2, -# win_width, -# side=side, -# denoise=denoise, -# norm=norm, -# use_overlap=use_overlap, -# ) -# overlap_position = _calculate_curvature(list_metric)[1] -# overlap_position += offset -# overlap = ncol1 - overlap_position + win_width // 2 -# elif side == 0: -# (list_metric, offset) = _search_overlap( -# mat1, -# mat2, -# win_width, -# side=side, -# denoise=denoise, -# norm=norm, -# use_overlap=use_overlap, -# ) -# overlap_position = _calculate_curvature(list_metric)[1] -# overlap_position += offset -# overlap = overlap_position + win_width // 2 -# else: -# (list_metric1, offset1) = _search_overlap( -# mat1, -# mat2, -# win_width, -# side=1, -# denoise=denoise, -# norm=norm, -# use_overlap=use_overlap, -# ) -# (list_metric2, offset2) = _search_overlap( -# mat1, -# mat2, -# win_width, -# side=0, -# denoise=denoise, -# norm=norm, -# use_overlap=use_overlap, -# ) - -# (curvature1, overlap_position1) = _calculate_curvature(list_metric1) -# overlap_position1 += offset1 -# (curvature2, overlap_position2) = _calculate_curvature(list_metric2) -# overlap_position2 += offset2 - -# if curvature1 > curvature2: -# side = 1 -# overlap_position = overlap_position1 -# overlap = ncol1 - overlap_position + win_width // 2 -# else: -# side = 0 -# overlap_position = overlap_position2 -# overlap = overlap_position + win_width // 2 - -# return overlap, side, overlap_position - - -# @nvtx.annotate() -# def _search_overlap( -# mat1, mat2, win_width, side, denoise=True, norm=False, use_overlap=False -# ): -# """ -# Calculate the correlation metrics between a rectangular region, defined -# by the window width, on the utmost left/right side of image 2 and the -# same size region in image 1 where the region is slided across image 1. - -# Parameters -# ---------- -# mat1 : array_like -# 2D array. Projection image or sinogram image. -# mat2 : array_like -# 2D array. Projection image or sinogram image. -# win_width : int -# Width of the searching window. -# side : {0, 1} -# Only two options: 0 or 1. It is used to indicate the overlap side -# respects to image 1. "0" corresponds to the left side. "1" corresponds -# to the right side. -# denoise : bool, optional -# Apply the Gaussian filter if True. -# norm : bool, optional -# Apply the normalization if True. -# use_overlap : bool, optional -# Use the combination of images in the overlap area for calculating -# correlation coefficients if True. - -# Returns -# ------- -# list_metric : array_like -# 1D array. List of the correlation metrics. -# offset : int -# Initial position of the searching window where the position -# corresponds to the center of the window. -# """ -# if denoise is True: -# # note: the filtering makes the output contiguous -# with nvtx.annotate("denoise_filter", color="green"): -# mat1 = gaussian_filter(mat1, (2, 2), mode="reflect") -# mat2 = gaussian_filter(mat2, (2, 2), mode="reflect") -# else: -# mat1 = xp.ascontiguousarray(mat1, dtype=xp.float32) -# mat2 = xp.ascontiguousarray(mat2, dtype=xp.float32) - -# (nrow1, ncol1) = mat1.shape -# (nrow2, ncol2) = mat2.shape - -# if nrow1 != nrow2: -# raise ValueError("Two images are not at the same height!!!") - -# win_width = int(np.clip(win_width, 6, min(ncol1, ncol2) // 2 - 1)) -# offset = win_width // 2 -# win_width = 2 * offset # Make it even - -# list_metric = _calc_metrics(mat1, mat2, win_width, side, use_overlap, norm) - -# min_metric = xp.min(list_metric) -# if min_metric != 0.0: -# list_metric /= min_metric - -# return list_metric, offset - - -# _calc_metrics_module = load_cuda_module( -# "calc_metrics", -# name_expressions=[ -# "calc_metrics_kernel", -# "calc_metrics_kernel", -# "calc_metrics_kernel", -# "calc_metrics_kernel", -# ], -# options=("--maxrregcount=32",), -# ) - - -# @nvtx.annotate() -# def _calc_metrics(mat1, mat2, win_width, side, use_overlap, norm): -# assert mat1.dtype == xp.float32, "only float32 supported" -# assert mat2.dtype == xp.float32, "only float32 supported" -# assert mat1.shape[0] == mat2.shape[0] -# assert mat1.flags.c_contiguous, "only contiguos arrays supported" -# assert mat2.flags.c_contiguous, "only contiguos arrays supported" - -# num_pos = mat1.shape[1] - win_width -# list_metric = xp.empty(num_pos, dtype=xp.float32) - -# args = ( -# mat1, -# np.int32(mat1.strides[0] / mat1.strides[1]), -# mat2, -# np.int32(mat2.strides[0] / mat2.strides[1]), -# np.int32(win_width), -# np.int32(mat1.shape[0]), -# np.int32(side), -# list_metric, -# ) -# block = (128, 1, 1) -# grid = (1, np.int32(num_pos), 1) -# smem = block[0] * 4 * 6 if use_overlap else block[0] * 4 * 3 -# bool2str = lambda x: "true" if x is True else "false" -# calc_metrics = _calc_metrics_module.get_function( -# f"calc_metrics_kernel<{bool2str(norm)}, {bool2str(use_overlap)}>" -# ) -# calc_metrics(grid=grid, block=block, args=args, shared_mem=smem) - -# return list_metric - - -# @nvtx.annotate() -# def _calculate_curvature(list_metric): -# """ -# Calculate the curvature of a fitted curve going through the minimum -# value of a metric list. - -# Parameters -# ---------- -# list_metric : array_like -# 1D array. List of metrics. - -# Returns -# ------- -# curvature : float -# Quadratic coefficient of the parabola fitting. -# min_pos : float -# Position of the minimum value with sub-pixel accuracy. -# """ -# radi = 2 -# num_metric = list_metric.size -# min_metric_idx = int(xp.argmin(list_metric)) -# min_pos = int(np.clip(min_metric_idx, radi, num_metric - radi - 1)) - -# # work mostly on CPU here - we have very small arrays here -# list1 = xp.asnumpy(list_metric[min_pos - radi : min_pos + radi + 1]) -# afact1 = np.polyfit(np.arange(0, 2 * radi + 1), list1, 2)[0] -# list2 = xp.asnumpy(list_metric[min_pos - 1 : min_pos + 2]) -# (afact2, bfact2, _) = np.polyfit(np.arange(min_pos - 1, min_pos + 2), list2, 2) - -# curvature = np.abs(afact1) -# if afact2 != 0.0: -# num = -bfact2 / (2 * afact2) -# if (num >= min_pos - 1) and (num <= min_pos + 1): -# min_pos = num - -# return curvature, np.float32(min_pos) - - -# # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -# ## %%%%%%%%%%%%%%%%%%%%%%find_center_pc%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# @nvtx.annotate() -# def find_center_pc( -# proj1: xp.ndarray, proj2: xp.ndarray, tol: float = 0.5, rotc_guess: Union[float, Optional[str]] = None -# ) -> float: -# """Find rotation axis location by finding the offset between the first -# projection and a mirrored projection 180 degrees apart using -# phase correlation in Fourier space. -# The `phase_cross_correlation` function uses cross-correlation in Fourier -# space, optionally employing an upsampled matrix-multiplication DFT to -# achieve arbitrary subpixel precision. :cite:`Guizar:08`. - -# Args: -# proj1 (xp.ndarray): Projection from the 0th degree -# proj2 (xp.ndarray): Projection from the 180th degree -# tol (float, optional): Subpixel accuracy. Defaults to 0.5. -# rotc_guess (float, optional): Initial guess value for the rotation center. Defaults to None. - -# Returns: -# float: Rotation axis location. -# """ -# if xp.__name__ == "cupy": -# from cupyx.scipy.ndimage import shift -# try: -# from cucim.skimage.registration import phase_cross_correlation -# except ImportError: -# print( -# "Cucim library of Rapidsai is a required dependency for find_center_pc module, please install" -# ) -# else: -# from skimage.registration import phase_cross_correlation -# from scipy.ndimage import shift - -# imgshift = 0.0 if rotc_guess is None else rotc_guess - (proj1.shape[1] - 1.0) / 2.0 - -# proj1 = shift(proj1, [0, -imgshift], mode="constant", cval=0) -# proj2 = shift(proj2, [0, -imgshift], mode="constant", cval=0) - -# # create reflection of second projection -# proj2 = xp.fliplr(proj2) - -# # using cucim of rapids to do phase cross correlation between two images -# shiftr = phase_cross_correlation( -# reference_image=proj1, moving_image=proj2, upsample_factor=1.0 / tol -# ) - -# # Compute center of rotation as the center of first image and the -# # registered translation with the second image -# center = (proj1.shape[1] + shiftr[0][1] - 1.0) / 2.0 - -# return center + imgshift - -# ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +def find_center_360( + data: cp.ndarray, + ind: Optional[int] = None, + win_width: int = 10, + side: Optional[Literal[0, 1]] = None, + denoise: bool = True, + norm: bool = False, + use_overlap: bool = False, +) -> Tuple[float, float, Optional[Literal[0, 1]], float]: + """ + Find the center-of-rotation (COR) in a 360-degree scan with offset COR use + the method presented in Ref. [1] by Nghia Vo. + + This function supports both numpy and cupy - the implementation is selected + by where the input data array resides. + + Parameters + ---------- + data : cp.ndarray + 3D tomographic data as a Cupy array. + ind : int, optional + Index of the slice to be used for estimate the CoR and the overlap. + win_width : int, optional + Window width used for finding the overlap area. + side : {None, 0, 1}, optional + Overlap size. Only there options: None, 0, or 1. "None" corresponds + to fully automated determination. "0" corresponds to the left side. + "1" corresponds to the right side. + denoise : bool, optional + Apply the Gaussian filter if True. + norm : bool, optional + Apply the normalisation if True. + use_overlap : bool, optional + Use the combination of images in the overlap area for calculating + correlation coefficients if True. + + Returns + ------- + cor : float + Center-of-rotation. + overlap : float + Width of the overlap area between two halves of the sinogram. + side : int + Overlap side between two halves of the sinogram. + overlap_position : float + Position of the window in the first image giving the best + correlation metric. + + References + ---------- + [1] : https://doi.org/10.1364/OE.418448 + """ + + if cupywrapper.cupy_run: + return __find_center_360(data, ind, win_width, side, denoise, norm, use_overlap) + else: + print("find_center_360 won't be executed because CuPy is not installed") + return (0, 0, 0, 0) + + +@nvtx.annotate() +def __find_center_360( + data: cp.ndarray, + ind: Optional[int] = None, + win_width: int = 10, + side: Optional[Literal[0, 1]] = None, + denoise: bool = True, + norm: bool = False, + use_overlap: bool = False, +) -> Tuple[float, float, Optional[Literal[0, 1]], float]: + + if data.ndim != 3: + raise ValueError("A 3D array must be provided") + + # this method works with a 360-degree sinogram. + if ind is None: + _sino = data[:, 0, :] + else: + _sino = data[:, ind, :] + + (nrow, ncol) = _sino.shape + nrow_180 = nrow // 2 + 1 + sino_top = _sino[0:nrow_180, :] + sino_bot = cp.fliplr(_sino[-nrow_180:, :]) + (overlap, side, overlap_position) = _find_overlap( + sino_top, sino_bot, win_width, side, denoise, norm, use_overlap + ) + if side == 0: + cor = overlap / 2.0 - 1.0 + else: + cor = ncol - overlap / 2.0 - 1.0 + + return cor, overlap, side, overlap_position + + +def _find_overlap( + mat1, mat2, win_width, side=None, denoise=True, norm=False, use_overlap=False +): + """ + Find the overlap area and overlap side between two images (Ref. [1]) where + the overlap side referring to the first image. + + Parameters + ---------- + mat1 : array_like + 2D array. Projection image or sinogram image. + mat2 : array_like + 2D array. Projection image or sinogram image. + win_width : int + Width of the searching window. + side : {None, 0, 1}, optional + Only there options: None, 0, or 1. "None" corresponding to fully + automated determination. "0" corresponding to the left side. "1" + corresponding to the right side. + denoise : bool, optional + Apply the Gaussian filter if True. + norm : bool, optional + Apply the normalization if True. + use_overlap : bool, optional + Use the combination of images in the overlap area for calculating + correlation coefficients if True. + + Returns + ------- + overlap : float + Width of the overlap area between two images. + side : int + Overlap side between two images. + overlap_position : float + Position of the window in the first image giving the best + correlation metric. + + """ + ncol1 = mat1.shape[1] + ncol2 = mat2.shape[1] + win_width = int(np.clip(win_width, 6, min(ncol1, ncol2) // 2)) + + if side == 1: + (list_metric, offset) = _search_overlap( + mat1, + mat2, + win_width, + side=side, + denoise=denoise, + norm=norm, + use_overlap=use_overlap, + ) + overlap_position = _calculate_curvature(list_metric)[1] + overlap_position += offset + overlap = ncol1 - overlap_position + win_width // 2 + elif side == 0: + (list_metric, offset) = _search_overlap( + mat1, + mat2, + win_width, + side=side, + denoise=denoise, + norm=norm, + use_overlap=use_overlap, + ) + overlap_position = _calculate_curvature(list_metric)[1] + overlap_position += offset + overlap = overlap_position + win_width // 2 + else: + (list_metric1, offset1) = _search_overlap( + mat1, + mat2, + win_width, + side=1, + denoise=denoise, + norm=norm, + use_overlap=use_overlap, + ) + (list_metric2, offset2) = _search_overlap( + mat1, + mat2, + win_width, + side=0, + denoise=denoise, + norm=norm, + use_overlap=use_overlap, + ) + + (curvature1, overlap_position1) = _calculate_curvature(list_metric1) + overlap_position1 += offset1 + (curvature2, overlap_position2) = _calculate_curvature(list_metric2) + overlap_position2 += offset2 + + if curvature1 > curvature2: + side = 1 + overlap_position = overlap_position1 + overlap = ncol1 - overlap_position + win_width // 2 + else: + side = 0 + overlap_position = overlap_position2 + overlap = overlap_position + win_width // 2 + + return overlap, side, overlap_position + + +@nvtx.annotate() +def _search_overlap( + mat1, mat2, win_width, side, denoise=True, norm=False, use_overlap=False +): + """ + Calculate the correlation metrics between a rectangular region, defined + by the window width, on the utmost left/right side of image 2 and the + same size region in image 1 where the region is slided across image 1. + + Parameters + ---------- + mat1 : array_like + 2D array. Projection image or sinogram image. + mat2 : array_like + 2D array. Projection image or sinogram image. + win_width : int + Width of the searching window. + side : {0, 1} + Only two options: 0 or 1. It is used to indicate the overlap side + respects to image 1. "0" corresponds to the left side. "1" corresponds + to the right side. + denoise : bool, optional + Apply the Gaussian filter if True. + norm : bool, optional + Apply the normalization if True. + use_overlap : bool, optional + Use the combination of images in the overlap area for calculating + correlation coefficients if True. + + Returns + ------- + list_metric : array_like + 1D array. List of the correlation metrics. + offset : int + Initial position of the searching window where the position + corresponds to the center of the window. + """ + from cupyx.scipy.ndimage import gaussian_filter + + if denoise is True: + # note: the filtering makes the output contiguous + with nvtx.annotate("denoise_filter", color="green"): + mat1 = gaussian_filter(mat1, (2, 2), mode="reflect") + mat2 = gaussian_filter(mat2, (2, 2), mode="reflect") + else: + mat1 = cp.ascontiguousarray(mat1, dtype=cp.float32) + mat2 = cp.ascontiguousarray(mat2, dtype=cp.float32) + + (nrow1, ncol1) = mat1.shape + (nrow2, ncol2) = mat2.shape + + if nrow1 != nrow2: + raise ValueError("Two images are not at the same height!!!") + + win_width = int(np.clip(win_width, 6, min(ncol1, ncol2) // 2 - 1)) + offset = win_width // 2 + win_width = 2 * offset # Make it even + + list_metric = _calc_metrics(mat1, mat2, win_width, side, use_overlap, norm) + + min_metric = cp.min(list_metric) + if min_metric != 0.0: + list_metric /= min_metric + + return list_metric, offset + + +@nvtx.annotate() +def _calc_metrics(mat1, mat2, win_width, side, use_overlap, norm): + assert mat1.dtype == cp.float32, "only float32 supported" + assert mat2.dtype == cp.float32, "only float32 supported" + assert mat1.shape[0] == mat2.shape[0] + assert mat1.flags.c_contiguous, "only contiguos arrays supported" + assert mat2.flags.c_contiguous, "only contiguos arrays supported" + + from httomolibgpu.cuda_kernels import load_cuda_module + + _calc_metrics_module = load_cuda_module( + "calc_metrics", + name_expressions=[ + "calc_metrics_kernel", + "calc_metrics_kernel", + "calc_metrics_kernel", + "calc_metrics_kernel", + ], + options=("--maxrregcount=32",), + ) + + num_pos = mat1.shape[1] - win_width + list_metric = cp.empty(num_pos, dtype=cp.float32) + + args = ( + mat1, + np.int32(mat1.strides[0] / mat1.strides[1]), + mat2, + np.int32(mat2.strides[0] / mat2.strides[1]), + np.int32(win_width), + np.int32(mat1.shape[0]), + np.int32(side), + list_metric, + ) + block = (128, 1, 1) + grid = (1, np.int32(num_pos), 1) + smem = block[0] * 4 * 6 if use_overlap else block[0] * 4 * 3 + bool2str = lambda x: "true" if x is True else "false" + calc_metrics = _calc_metrics_module.get_function( + f"calc_metrics_kernel<{bool2str(norm)}, {bool2str(use_overlap)}>" + ) + calc_metrics(grid=grid, block=block, args=args, shared_mem=smem) + + return list_metric + + +@nvtx.annotate() +def _calculate_curvature(list_metric): + """ + Calculate the curvature of a fitted curve going through the minimum + value of a metric list. + + Parameters + ---------- + list_metric : array_like + 1D array. List of metrics. + + Returns + ------- + curvature : float + Quadratic coefficient of the parabola fitting. + min_pos : float + Position of the minimum value with sub-pixel accuracy. + """ + radi = 2 + num_metric = list_metric.size + min_metric_idx = int(cp.argmin(list_metric)) + min_pos = int(np.clip(min_metric_idx, radi, num_metric - radi - 1)) + + # work mostly on CPU here - we have very small arrays here + list1 = cp.asnumpy(list_metric[min_pos - radi : min_pos + radi + 1]) + afact1 = np.polyfit(np.arange(0, 2 * radi + 1), list1, 2)[0] + list2 = cp.asnumpy(list_metric[min_pos - 1 : min_pos + 2]) + (afact2, bfact2, _) = np.polyfit(np.arange(min_pos - 1, min_pos + 2), list2, 2) + + curvature = np.abs(afact1) + if afact2 != 0.0: + num = -bfact2 / (2 * afact2) + if (num >= min_pos - 1) and (num <= min_pos + 1): + min_pos = num + + return curvature, np.float32(min_pos) + + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +## %%%%%%%%%%%%%%%%%%%%%%find_center_pc%%%%%%%%%%%%%%%%%%%%%%%%%%%% +def find_center_pc( + proj1: cp.ndarray, + proj2: cp.ndarray, + tol: float = 0.5, + rotc_guess: Union[float, Optional[str]] = None, +) -> float: + """Find rotation axis location by finding the offset between the first + projection and a mirrored projection 180 degrees apart using + phase correlation in Fourier space. + The `phase_cross_correlation` function uses cross-correlation in Fourier + space, optionally employing an upsampled matrix-multiplication DFT to + achieve arbitrary subpixel precision. :cite:`Guizar:08`. + + Args: + proj1 (cp.ndarray): Projection from the 0th degree + proj2 (cp.ndarray): Projection from the 180th degree + tol (float, optional): Subpixel accuracy. Defaults to 0.5. + rotc_guess (float, optional): Initial guess value for the rotation center. Defaults to None. + + Returns: + float: Rotation axis location. + """ + if cupywrapper.cupy_run: + return __find_center_pc(proj1, proj2, tol, rotc_guess) + else: + print("find_center_pc won't be executed because CuPy is not installed") + return 0 + + +@nvtx.annotate() +def __find_center_pc( + proj1: cp.ndarray, + proj2: cp.ndarray, + tol: float = 0.5, + rotc_guess: Union[float, Optional[str]] = None, +) -> float: + + from cupyx.scipy.ndimage import shift + from cucim.skimage.registration import phase_cross_correlation + + imgshift = 0.0 if rotc_guess is None else rotc_guess - (proj1.shape[1] - 1.0) / 2.0 + + proj1 = shift(proj1, [0, -imgshift], mode="constant", cval=0) + proj2 = shift(proj2, [0, -imgshift], mode="constant", cval=0) + + # create reflection of second projection + proj2 = cp.fliplr(proj2) + + # using cucim of rapids to do phase cross correlation between two images + shiftr = phase_cross_correlation( + reference_image=proj1, moving_image=proj2, upsample_factor=1.0 / tol + ) + + # Compute center of rotation as the center of first image and the + # registered translation with the second image + center = (proj1.shape[1] + shiftr[0][1] - 1.0) / 2.0 + + return center + imgshift + + +##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%