diff --git a/conda/recipe/meta.yaml b/conda/recipe/meta.yaml index cb35e294..98625e81 100644 --- a/conda/recipe/meta.yaml +++ b/conda/recipe/meta.yaml @@ -42,6 +42,7 @@ test: - httomolibgpu - httomolibgpu.misc - httomolibgpu.prep + - httomolibgpu.recon source_files: - tests/* commands: diff --git a/httomolibgpu/__init__.py b/httomolibgpu/__init__.py index 994c904f..016c8d77 100644 --- a/httomolibgpu/__init__.py +++ b/httomolibgpu/__init__.py @@ -1,9 +1,9 @@ from httomolibgpu.misc.corr import * from httomolibgpu.misc.morph import * from httomolibgpu.misc.rescale import * -#from httomolibgpu.prep.alignment import * -#from httomolibgpu.prep.normalize import * -#from httomolibgpu.prep.phase import * -#from httomolibgpu.prep.stripe import * -#from httomolibgpu.recon.algorithm import * -#from httomolibgpu.recon.rotation import * +from httomolibgpu.prep.alignment import * +from httomolibgpu.prep.normalize import * +from httomolibgpu.prep.phase import * +from httomolibgpu.prep.stripe import * +from httomolibgpu.recon.algorithm import * +from httomolibgpu.recon.rotation import * diff --git a/httomolibgpu/recon/__init__.py b/httomolibgpu/recon/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/httomolibgpu/recon/algorithm.py b/httomolibgpu/recon/algorithm.py new file mode 100644 index 00000000..4daedc91 --- /dev/null +++ b/httomolibgpu/recon/algorithm.py @@ -0,0 +1,292 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# --------------------------------------------------------------------------- +# Copyright 2022 Diamond Light Source Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# --------------------------------------------------------------------------- +# Created By : Tomography Team at DLS +# Changes relative to ToMoBAR 2024.01 version +# --------------------------------------------------------------------------- +"""Module for tomographic reconstruction""" + +import numpy as xp +import numpy as np + +cupy_run = False +try: + import cupy as xp + + try: + xp.cuda.Device(0).compute_capability + cupy_run = True + + except xp.cuda.runtime.CUDARuntimeError: + print("CuPy library is a major dependency for HTTomolibgpu, please install") + import numpy as np +except ImportError: + import numpy as np + +import nvtx +from numpy import float32, complex64 +from typing import Optional, Tuple, Union +from typing import Type + +from tomobar.methodsDIR_CuPy import RecToolsDIRCuPy +from tomobar.methodsIR_CuPy import RecToolsIRCuPy + +__all__ = [ + "FBP", + "SIRT", + "CGLS", +] + +input_data_axis_labels = ["angles", "detY", "detX"] # set the labels of the input data + + +## %%%%%%%%%%%%%%%%%%%%%%% FBP reconstruction %%%%%%%%%%%%%%%%%%%%%%%%%%%% ## +@nvtx.annotate() +def FBP( + data: xp.ndarray, + angles: np.ndarray, + center: Optional[float] = None, + filter_freq_cutoff: Optional[float] = 0.6, + recon_size: Optional[int] = None, + recon_mask_radius: Optional[float] = None, + gpu_id: int = 0, +) -> xp.ndarray: + """ + Perform Filtered Backprojection (FBP) reconstruction using ASTRA toolbox and ToMoBAR wrappers. + This is a 3D recon from a CuPy array and a custom built filter. + + Parameters + ---------- + data : cp.ndarray + Projection data as a CuPy array. + angles : np.ndarray + An array of angles given in radians. + center : float, optional + The center of rotation (CoR). + filter_freq_cutoff : float, optional + Cutoff frequency parameter for the sinc filter, the lowest values produce more crispy but noisy reconstruction. + recon_size : int, optional + The [recon_size, recon_size] shape of the reconstructed slice in pixels. + By default (None), the reconstructed size will be the dimension of the horizontal detector. + recon_mask_radius: float, optional + The radius of the circular mask that applies to the reconstructed slice in order to crop + out some undesirable artefacts. The values outside the diameter will be set to zero. + None by default, to see the effect of the mask try setting the value in the range [0.7-1.0]. + gpu_id : int, optional + A GPU device index to perform operation on. + + Returns + ------- + cp.ndarray + The FBP reconstructed volume as a CuPy array. + """ + RecToolsCP = _instantiate_direct_recon_class( + data, angles, center, recon_size, gpu_id + ) + + reconstruction = RecToolsCP.FBP( + data, + cutoff_freq=filter_freq_cutoff, + recon_mask_radius=recon_mask_radius, + data_axes_labels_order=input_data_axis_labels, + ) + xp._default_memory_pool.free_all_blocks() + return xp.require(xp.swapaxes(reconstruction, 0, 1), requirements="C") + + +## %%%%%%%%%%%%%%%%%%%%%%% SIRT reconstruction %%%%%%%%%%%%%%%%%%%%%%%%%%%% ## +@nvtx.annotate() +def SIRT( + data: xp.ndarray, + angles: np.ndarray, + center: Optional[float] = None, + recon_size: Optional[int] = None, + iterations: Optional[int] = 300, + nonnegativity: Optional[bool] = True, + gpu_id: int = 0, +) -> xp.ndarray: + """ + Perform Simultaneous Iterative Recostruction Technique (SIRT) using ASTRA toolbox and ToMoBAR wrappers. + This is 3D recon directly from a CuPy array while using ASTRA GPUlink capability. + + Parameters + ---------- + data : cp.ndarray + Projection data as a CuPy array. + angles : np.ndarray + An array of angles given in radians. + center : float, optional + The center of rotation (CoR). + recon_size : int, optional + The [recon_size, recon_size] shape of the reconstructed slice in pixels. + By default (None), the reconstructed size will be the dimension of the horizontal detector. + iterations : int, optional + The number of SIRT iterations. + nonnegativity : bool, optional + Impose nonnegativity constraint on reconstructed image. + gpu_id : int, optional + A GPU device index to perform operation on. + + Returns + ------- + cp.ndarray + The SIRT reconstructed volume as a CuPy array. + """ + + RecToolsCP = _instantiate_iterative_recon_class( + data, angles, center, recon_size, gpu_id, datafidelity="LS" + ) + + _data_ = { + "projection_norm_data": data, + "data_axes_labels_order": input_data_axis_labels, + } # data dictionary + _algorithm_ = { + "iterations": iterations, + "nonnegativity": nonnegativity, + } + reconstruction = RecToolsCP.SIRT(_data_, _algorithm_) + xp._default_memory_pool.free_all_blocks() + return xp.require(xp.swapaxes(reconstruction, 0, 1), requirements="C") + + +## %%%%%%%%%%%%%%%%%%%%%%% CGLS reconstruction %%%%%%%%%%%%%%%%%%%%%%%%%%%% ## +@nvtx.annotate() +def CGLS( + data: xp.ndarray, + angles: np.ndarray, + center: Optional[float] = None, + recon_size: Optional[int] = None, + iterations: Optional[int] = 20, + nonnegativity: Optional[bool] = True, + gpu_id: int = 0, +) -> xp.ndarray: + """ + Perform Congugate Gradient Least Squares (CGLS) using ASTRA toolbox and ToMoBAR wrappers. + This is 3D recon directly from a CuPy array while using ASTRA GPUlink capability. + + Parameters + ---------- + data : cp.ndarray + Projection data as a CuPy array. + angles : np.ndarray + An array of angles given in radians. + center : float, optional + The center of rotation (CoR). + recon_size : int, optional + The [recon_size, recon_size] shape of the reconstructed slice in pixels. + By default (None), the reconstructed size will be the dimension of the horizontal detector. + iterations : int, optional + The number of CGLS iterations. + nonnegativity : bool, optional + Impose nonnegativity constraint on reconstructed image. + gpu_id : int, optional + A GPU device index to perform operation on. + + Returns + ------- + cp.ndarray + The CGLS reconstructed volume as a CuPy array. + """ + RecToolsCP = _instantiate_iterative_recon_class( + data, angles, center, recon_size, gpu_id, datafidelity="LS" + ) + + _data_ = { + "projection_norm_data": data, + "data_axes_labels_order": input_data_axis_labels, + } # data dictionary + _algorithm_ = {"iterations": iterations, "nonnegativity": nonnegativity} + reconstruction = RecToolsCP.CGLS(_data_, _algorithm_) + xp._default_memory_pool.free_all_blocks() + return xp.require(xp.swapaxes(reconstruction, 0, 1), requirements="C") + + +## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## +def _instantiate_direct_recon_class( + data: xp.ndarray, + angles: np.ndarray, + center: Optional[float] = None, + recon_size: Optional[int] = None, + gpu_id: int = 0, +) -> type[RecToolsDIRCuPy]: + """instantiate ToMoBAR's direct recon class + + Args: + data (cp.ndarray): data array + angles (np.ndarray): angles + center (Optional[float], optional): center of recon. Defaults to None. + recon_size (Optional[int], optional): recon_size. Defaults to None. + gpu_id (int, optional): gpu ID. Defaults to 0. + + Returns: + type[RecToolsDIRCuPy]: an instance of the direct recon class + """ + if center is None: + center = data.shape[2] // 2 # making a crude guess + if recon_size is None: + recon_size = data.shape[2] + RecToolsCP = RecToolsDIRCuPy( + DetectorsDimH=data.shape[2], # Horizontal detector dimension + DetectorsDimV=data.shape[1], # Vertical detector dimension (3D case) + CenterRotOffset=data.shape[2] / 2 + - center + - 0.5, # Center of Rotation scalar or a vector + AnglesVec=-angles, # A vector of projection angles in radians + ObjSize=recon_size, # Reconstructed object dimensions (scalar) + device_projector=gpu_id, + ) + return RecToolsCP + + +def _instantiate_iterative_recon_class( + data: xp.ndarray, + angles: np.ndarray, + center: Optional[float] = None, + recon_size: Optional[int] = None, + gpu_id: int = 0, + datafidelity: str = "LS", +) -> type[RecToolsIRCuPy]: + """instantiate ToMoBAR's iterative recon class + + Args: + data (cp.ndarray): data array + angles (np.ndarray): angles + center (Optional[float], optional): center of recon. Defaults to None. + recon_size (Optional[int], optional): recon_size. Defaults to None. + datafidelity (str, optional): Data fidelity + gpu_id (int, optional): gpu ID. Defaults to 0. + + Returns: + type[RecToolsIRCuPy]: an instance of the iterative class + """ + if center is None: + center = data.shape[2] // 2 # making a crude guess + if recon_size is None: + recon_size = data.shape[2] + RecToolsCP = RecToolsIRCuPy( + DetectorsDimH=data.shape[2], # Horizontal detector dimension + DetectorsDimV=data.shape[1], # Vertical detector dimension (3D case) + CenterRotOffset=data.shape[2] / 2 + - center + - 0.5, # Center of Rotation scalar or a vector + AnglesVec=-angles, # A vector of projection angles in radians + ObjSize=recon_size, # Reconstructed object dimensions (scalar) + datafidelity=datafidelity, + device_projector=gpu_id, + ) + return RecToolsCP diff --git a/httomolibgpu/recon/rotation.py b/httomolibgpu/recon/rotation.py new file mode 100644 index 00000000..08250e22 --- /dev/null +++ b/httomolibgpu/recon/rotation.py @@ -0,0 +1,753 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# --------------------------------------------------------------------------- +# Copyright 2022 Diamond Light Source Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# --------------------------------------------------------------------------- +# Created By : Tomography Team at DLS +# Created Date: 01 November 2022 +# --------------------------------------------------------------------------- +"""Modules for finding the axis of rotation""" + +import numpy as xp +import numpy as np + +cupy_run = False +try: + import cupy as xp + import cupyx + import cupyx.scipy.ndimage as cpndi + from cupyx.scipy.ndimage import gaussian_filter, shift + + try: + xp.cuda.Device(0).compute_capability + cupy_run = True + + except xp.cuda.runtime.CUDARuntimeError: + print("CuPy library is a major dependency for HTTomolibgpu, please install") + import numpy as np +except ImportError: + import numpy as np + +import nvtx +import math +from typing import List, Literal, Optional, Tuple + +if cupy_run: + from httomolibgpu.cuda_kernels import load_cuda_module + +__all__ = [ + "find_center_vo", + "find_center_360", + "find_center_pc", +] + + +@nvtx.annotate() +def find_center_vo( + data: xp.ndarray, + ind: Optional[int] = None, + smin: int = -50, + smax: int = 50, + srad: float = 6.0, + step: float = 0.25, + ratio: float = 0.5, + drop: int = 20, +) -> float: + """ + Find rotation axis location using Nghia Vo's method. See the paper + https://opg.optica.org/oe/fulltext.cfm?uri=oe-22-16-19078&id=297315 + + Parameters + ---------- + data : cp.ndarray + 3D tomographic data or a 2D sinogram as a CuPy array. + ind : int, optional + Index of the slice to be used to estimate the CoR. + smin : int, optional + Coarse search radius. Reference to the horizontal center of + the sinogram. + smax : int, optional + Coarse search radius. Reference to the horizontal center of + the sinogram. + srad : float, optional + Fine search radius. + step : float, optional + Step of fine searching. + ratio : float, optional + The ratio between the FOV of the camera and the size of object. + It's used to generate the mask. + drop : int, optional + Drop lines around vertical center of the mask. + + Returns + ------- + float + Rotation axis location. + """ + + if data.ndim == 2: + data = xp.expand_dims(data, 1) + ind = 0 + + height = data.shape[1] + + if ind is None: + ind = height // 2 + if height > 10: + _sino = xp.mean(data[:, ind - 5 : ind + 5, :], axis=1) + else: + _sino = data[:, ind, :] + else: + _sino = data[:, ind, :] + + with nvtx.annotate("gaussian_filter_1", color="green"): + _sino_cs = gaussian_filter(_sino, (3, 1), mode="reflect") + with nvtx.annotate("gaussian_filter_2", color="green"): + _sino_fs = gaussian_filter(_sino, (2, 2), mode="reflect") + + if _sino.shape[0] * _sino.shape[1] > 4e6: + # data is large, so downsample it before performing search for + # centre of rotation + _sino_coarse = _downsample(_sino_cs, 2, 1) + init_cen = _search_coarse(_sino_coarse, smin / 4.0, smax / 4.0, ratio, drop) + fine_cen = _search_fine(_sino_fs, srad, step, init_cen * 4.0, ratio, drop) + else: + init_cen = _search_coarse(_sino_cs, smin, smax, ratio, drop) + fine_cen = _search_fine(_sino_fs, srad, step, init_cen, ratio, drop) + + return fine_cen.get() + + +@nvtx.annotate() +def find_center_pc(proj1, proj2, tol=0.5, rotc_guess=None): + """ + Find rotation axis location by finding the offset between the first + projection and a mirrored projection 180 degrees apart using + phase correlation in Fourier space. + The ``phase_cross_correlation`` function uses cross-correlation in Fourier + space, optionally employing an upsampled matrix-multiplication DFT to + achieve arbitrary subpixel precision. :cite:`Guizar:08`. + + Parameters + ---------- + proj1 : cp.ndarray + Projection data + + proj2 : cp.ndarray + Projection data + + tol : scalar, optional + Subpixel accuracy + + rotc_guess : float, optional + Initial guess value for the rotation center + + Returns + ------- + float + Rotation axis location. + """ + from cucim.skimage.registration import phase_cross_correlation + + imgshift = 0.0 if rotc_guess is None else rotc_guess - (proj1.shape[1] - 1.0) / 2.0 + + proj1 = cpndi.shift(proj1, [0, -imgshift], mode="constant", cval=0) + proj2 = cpndi.shift(proj2, [0, -imgshift], mode="constant", cval=0) + + # create reflection of second projection + proj2 = xp.fliplr(proj2) + + # using cucim of rapids to do phase cross correlation between two images + shift = phase_cross_correlation( + reference_image=proj1, moving_image=proj2, upsample_factor=1.0 / tol + ) + + # Compute center of rotation as the center of first image and the + # registered translation with the second image + center = (proj1.shape[1] + shift[0][1] - 1.0) / 2.0 + + return center + imgshift + + +@nvtx.annotate() +def _search_coarse(sino, smin, smax, ratio, drop): + (nrow, ncol) = sino.shape + flip_sino = xp.ascontiguousarray(xp.fliplr(sino)) + comp_sino = xp.ascontiguousarray(xp.flipud(sino)) + mask = _create_mask(2 * nrow, ncol, 0.5 * ratio * ncol, drop) + + cen_fliplr = (ncol - 1.0) / 2.0 + smin_clip_val = max(min(smin + cen_fliplr, ncol - 1), 0) + smin = smin_clip_val - cen_fliplr + smax_clip_val = max(min(smax + cen_fliplr, ncol - 1), 0) + smax = smax_clip_val - cen_fliplr + start_cor = ncol // 2 + smin + stop_cor = ncol // 2 + smax + list_cor = xp.arange(start_cor, stop_cor + 0.5, 0.5, dtype=xp.float32) + list_shift = 2.0 * (list_cor - cen_fliplr) + list_metric = xp.empty(list_shift.shape, dtype=xp.float32) + _calculate_metric(list_shift, sino, flip_sino, comp_sino, mask, list_metric) + + minpos = xp.argmin(list_metric) + if minpos == 0: + print("WARNING!!!Global minimum is out of searching range") + print(f"Please extend smin: {smin}") + if minpos == len(list_metric) - 1: + print("WARNING!!!Global minimum is out of searching range") + print(f"Please extend smax: {smax}") + cor = list_cor[minpos] + return cor + + +@nvtx.annotate() +def _search_fine(sino, srad, step, init_cen, ratio, drop): + (nrow, ncol) = sino.shape + + flip_sino = xp.ascontiguousarray(xp.fliplr(sino)) + comp_sino = xp.ascontiguousarray(xp.flipud(sino)) + mask = _create_mask(2 * nrow, ncol, 0.5 * ratio * ncol, drop) + + cen_fliplr = (ncol - 1.0) / 2.0 + srad = max(min(abs(float(srad)), ncol / 4.0), 1.0) + step = max(min(abs(step), srad), 0.1) + init_cen = max(min(init_cen, ncol - srad - 1), srad) + list_cor = init_cen + xp.arange(-srad, srad + step, step, dtype=np.float32) + list_shift = 2.0 * (list_cor - cen_fliplr) + list_metric = xp.empty(list_shift.shape, dtype="float32") + + _calculate_metric(list_shift, sino, flip_sino, comp_sino, mask, out=list_metric) + cor = list_cor[xp.argmin(list_metric)] + return cor + + +@nvtx.annotate() +def _create_mask(nrow, ncol, radius, drop): + du = 1.0 / ncol + dv = (nrow - 1.0) / (nrow * 2.0 * np.pi) + cen_row = int(math.ceil(nrow / 2.0) - 1) + cen_col = int(math.ceil(ncol / 2.0) - 1) + drop = min([drop, int(math.ceil(0.05 * nrow))]) + + block_x = 128 + block_y = 1 + block_dims = (block_x, block_y) + grid_x = (ncol // 2 + 1 + block_x - 1) // block_x + grid_y = nrow + grid_dims = (grid_x, grid_y) + mask = xp.empty((nrow, ncol // 2 + 1), dtype="uint16") + params = ( + ncol, + nrow, + cen_col, + cen_row, + xp.float32(du), + xp.float32(dv), + xp.float32(radius), + xp.float32(drop), + mask, + ) + module = load_cuda_module("generate_mask") + kernel = module.get_function("generate_mask") + kernel(grid_dims, block_dims, params) + return mask + + +def round_up(x: float) -> int: + if x >= 0.0: + return int(math.ceil(x)) + else: + return int(math.floor(x)) + + +def _get_available_gpu_memory() -> int: + dev = xp.cuda.Device() + # first, let's make some space + xp.get_default_memory_pool().free_all_blocks() + cache = xp.fft.config.get_plan_cache() + cache.clear() + available_memory = dev.mem_info[0] + xp.get_default_memory_pool().free_bytes() + return int(available_memory * 0.9) # 10% safety margin + + +def _calculate_chunks( + nshifts: int, shift_size: int, available_memory: Optional[int] = None +) -> List[int]: + if available_memory is None: + available_memory = _get_available_gpu_memory() + + available_memory -= shift_size + freq_domain_size = ( + shift_size # it needs only half (RFFT), but complex64, so it's the same + ) + fft_plan_size = freq_domain_size + size_per_shift = fft_plan_size + freq_domain_size + shift_size + nshift_max = available_memory // size_per_shift + assert nshift_max > 0, "Not enough memory to process" + num_chunks = int(np.ceil(nshifts / nshift_max)) + chunk_size = int(np.ceil(nshifts / num_chunks)) + chunks = [chunk_size] * (num_chunks - 1) + stop_idx = list(np.cumsum(chunks)) + stop_idx.append(nshifts) + return stop_idx + + +@nvtx.annotate() +def _calculate_metric(list_shift, sino1, sino2, sino3, mask, out): + # this tries to simplify - if shift_col is integer, no need to spline interpolate + assert list_shift.dtype == xp.float32, "shifts must be single precision floats" + assert sino1.dtype == xp.float32, "sino1 must be float32" + assert sino2.dtype == xp.float32, "sino1 must be float32" + assert sino3.dtype == xp.float32, "sino1 must be float32" + assert out.dtype == xp.float32, "sino1 must be float32" + assert sino2.flags["C_CONTIGUOUS"], "sino2 must be C-contiguous" + assert sino3.flags["C_CONTIGUOUS"], "sino3 must be C-contiguous" + assert list_shift.flags["C_CONTIGUOUS"], "list_shift must be C-contiguous" + nshifts = list_shift.shape[0] + na1 = sino1.shape[0] + na2 = sino2.shape[0] + + module = load_cuda_module("center_360_shifts") + shift_whole_shifts = module.get_function("shift_whole_shifts") + # note: we don't have to calculate the mean here, as we're only looking for minimum metric. + # The sum is enough. + masked_sum_abs_kernel = xp.ReductionKernel( + in_params="complex64 x, uint16 mask", # input, complex + mask + out_params="float32 out", # output, real + map_expr="mask ? abs(x) : 0.0f", + reduce_expr="a + b", + post_map_expr="out = a", + identity="0.0f", + reduce_type="float", + name="masked_sum_abs", + ) + + # determine how many shifts we can fit in the available memory + # and iterate in chunks + chunks = _calculate_chunks( + nshifts, (na1 + na2) * sino2.shape[1] * xp.float32().nbytes + ) + + mat = xp.empty((chunks[0], na1 + na2, sino2.shape[1]), dtype=xp.float32) + mat[:, :na1, :] = sino1 + # explicitly create FFT plan here, so it's not cached and clearly re-used + plan = cupyx.scipy.fftpack.get_fft_plan( + mat, mat.shape[-2:], axes=(1, 2), value_type="R2C" + ) + + for i, stop_idx in enumerate(chunks): + if i > 0: + # more than one iteration means we're tight on memory, so clear up freed blocks + mat_freq = None + xp.get_default_memory_pool().free_all_blocks() + + start_idx = 0 if i == 0 else chunks[i - 1] + size = stop_idx - start_idx + + # first, handle the integer shifts without spline in a raw kernel, + # and shift in the sino3 one accordingly + bx = 128 + gx = (sino3.shape[1] + bx - 1) // bx + shift_whole_shifts( + grid=(gx, na2, size), #### + block=(bx, 1, 1), + args=( + sino2, + sino3, + list_shift[start_idx:stop_idx], + mat[:, na1:, :], + sino3.shape[1], + na1 + na2, + ), + ) + + # now we can only look at the spline shifting, the rest is done + list_shift_host = xp.asnumpy(list_shift[start_idx:stop_idx]) + for i in range(list_shift_host.shape[0]): + shift_col = float(list_shift_host[i]) + if not shift_col.is_integer(): + shifted = shift(sino2, (0, shift_col), order=3, prefilter=True) + shift_int = round_up(shift_col) + if shift_int >= 0: + mat[i, na1:, shift_int:] = shifted[:, shift_int:] + else: + mat[i, na1:, :shift_int] = shifted[:, :shift_int] + + # stack and transform + # (we do the full sized mat FFT, even though the last chunk may be smaller, to + # make sure we can re-use the same FFT plan as before) + mat_freq = cupyx.scipy.fft.rfft2(mat, axes=(1, 2), norm=None, plan=plan) + masked_sum_abs_kernel( + mat_freq[:size, :, :], mask, out=out[start_idx:stop_idx], axis=(1, 2) + ) + + +@nvtx.annotate() +def _downsample(sino, level, axis): + assert sino.dtype == xp.float32, "single precision floating point input required" + assert sino.flags["C_CONTIGUOUS"], "list_shift must be C-contiguous" + + dx, dz = sino.shape + # Determine the new size, dim, of the downsampled dimension + dim = int(sino.shape[axis] / math.pow(2, level)) + shape = [dx, dz] + shape[axis] = dim + downsampled_data = xp.empty(shape, dtype="float32") + + block_x = 8 + block_y = 8 + block_dims = (block_x, block_y) + grid_x = (sino.shape[1] + block_x - 1) // block_x + grid_y = (sino.shape[0] + block_y - 1) // block_y + grid_dims = (grid_x, grid_y) + # 8x8 thread-block, which means 16 "lots" of columns to downsample per + # thread-block; 4 bytes per float, so allocate 16*6 = 64 bytes of shared + # memeory per thread-block + shared_mem_bytes = 64 + params = (sino, dx, dz, level, downsampled_data) + module = load_cuda_module("downsample_sino") + kernel = module.get_function("downsample_sino") + kernel(grid_dims, block_dims, params, shared_mem=shared_mem_bytes) + return downsampled_data + + +# --- Center of rotation (COR) estimation method ---# +@nvtx.annotate() +def find_center_360( + data: xp.ndarray, + ind: Optional[int] = None, + win_width: int = 10, + side: Optional[Literal[0, 1]] = None, + denoise: bool = True, + norm: bool = False, + use_overlap: bool = False, +) -> Tuple[float, float, Optional[Literal[0, 1]], float]: + """ + Find the center-of-rotation (COR) in a 360-degree scan with offset COR use + the method presented in Ref. [1] by Nghia Vo. + + This function supports both numpy and cupy - the implementation is selected + by where the input data array resides. + + Parameters + ---------- + data : cp.ndarray + 3D tomographic data as a Cupy array. + ind : int, optional + Index of the slice to be used for estimate the CoR and the overlap. + win_width : int, optional + Window width used for finding the overlap area. + side : {None, 0, 1}, optional + Overlap size. Only there options: None, 0, or 1. "None" corresponds + to fully automated determination. "0" corresponds to the left side. + "1" corresponds to the right side. + denoise : bool, optional + Apply the Gaussian filter if True. + norm : bool, optional + Apply the normalisation if True. + use_overlap : bool, optional + Use the combination of images in the overlap area for calculating + correlation coefficients if True. + + Returns + ------- + cor : float + Center-of-rotation. + overlap : float + Width of the overlap area between two halves of the sinogram. + side : int + Overlap side between two halves of the sinogram. + overlap_position : float + Position of the window in the first image giving the best + correlation metric. + + References + ---------- + [1] : https://doi.org/10.1364/OE.418448 + """ + if data.ndim != 3: + raise ValueError("A 3D array must be provided") + + # this method works with a 360-degree sinogram. + if ind is None: + _sino = data[:, 0, :] + else: + _sino = data[:, ind, :] + + (nrow, ncol) = _sino.shape + nrow_180 = nrow // 2 + 1 + sino_top = _sino[0:nrow_180, :] + sino_bot = xp.fliplr(_sino[-nrow_180:, :]) + (overlap, side, overlap_position) = _find_overlap( + sino_top, sino_bot, win_width, side, denoise, norm, use_overlap + ) + if side == 0: + cor = overlap / 2.0 - 1.0 + else: + cor = ncol - overlap / 2.0 - 1.0 + + return cor, overlap, side, overlap_position + + +def _find_overlap( + mat1, mat2, win_width, side=None, denoise=True, norm=False, use_overlap=False +): + """ + Find the overlap area and overlap side between two images (Ref. [1]) where + the overlap side referring to the first image. + + Parameters + ---------- + mat1 : array_like + 2D array. Projection image or sinogram image. + mat2 : array_like + 2D array. Projection image or sinogram image. + win_width : int + Width of the searching window. + side : {None, 0, 1}, optional + Only there options: None, 0, or 1. "None" corresponding to fully + automated determination. "0" corresponding to the left side. "1" + corresponding to the right side. + denoise : bool, optional + Apply the Gaussian filter if True. + norm : bool, optional + Apply the normalization if True. + use_overlap : bool, optional + Use the combination of images in the overlap area for calculating + correlation coefficients if True. + + Returns + ------- + overlap : float + Width of the overlap area between two images. + side : int + Overlap side between two images. + overlap_position : float + Position of the window in the first image giving the best + correlation metric. + + """ + ncol1 = mat1.shape[1] + ncol2 = mat2.shape[1] + win_width = int(np.clip(win_width, 6, min(ncol1, ncol2) // 2)) + + if side == 1: + (list_metric, offset) = _search_overlap( + mat1, + mat2, + win_width, + side=side, + denoise=denoise, + norm=norm, + use_overlap=use_overlap, + ) + overlap_position = _calculate_curvature(list_metric)[1] + overlap_position += offset + overlap = ncol1 - overlap_position + win_width // 2 + elif side == 0: + (list_metric, offset) = _search_overlap( + mat1, + mat2, + win_width, + side=side, + denoise=denoise, + norm=norm, + use_overlap=use_overlap, + ) + overlap_position = _calculate_curvature(list_metric)[1] + overlap_position += offset + overlap = overlap_position + win_width // 2 + else: + (list_metric1, offset1) = _search_overlap( + mat1, + mat2, + win_width, + side=1, + denoise=denoise, + norm=norm, + use_overlap=use_overlap, + ) + (list_metric2, offset2) = _search_overlap( + mat1, + mat2, + win_width, + side=0, + denoise=denoise, + norm=norm, + use_overlap=use_overlap, + ) + + (curvature1, overlap_position1) = _calculate_curvature(list_metric1) + overlap_position1 += offset1 + (curvature2, overlap_position2) = _calculate_curvature(list_metric2) + overlap_position2 += offset2 + + if curvature1 > curvature2: + side = 1 + overlap_position = overlap_position1 + overlap = ncol1 - overlap_position + win_width // 2 + else: + side = 0 + overlap_position = overlap_position2 + overlap = overlap_position + win_width // 2 + + return overlap, side, overlap_position + + +@nvtx.annotate() +def _search_overlap( + mat1, mat2, win_width, side, denoise=True, norm=False, use_overlap=False +): + """ + Calculate the correlation metrics between a rectangular region, defined + by the window width, on the utmost left/right side of image 2 and the + same size region in image 1 where the region is slided across image 1. + + Parameters + ---------- + mat1 : array_like + 2D array. Projection image or sinogram image. + mat2 : array_like + 2D array. Projection image or sinogram image. + win_width : int + Width of the searching window. + side : {0, 1} + Only two options: 0 or 1. It is used to indicate the overlap side + respects to image 1. "0" corresponds to the left side. "1" corresponds + to the right side. + denoise : bool, optional + Apply the Gaussian filter if True. + norm : bool, optional + Apply the normalization if True. + use_overlap : bool, optional + Use the combination of images in the overlap area for calculating + correlation coefficients if True. + + Returns + ------- + list_metric : array_like + 1D array. List of the correlation metrics. + offset : int + Initial position of the searching window where the position + corresponds to the center of the window. + """ + if denoise is True: + # note: the filtering makes the output contiguous + with nvtx.annotate("denoise_filter", color="green"): + mat1 = cpndi.gaussian_filter(mat1, (2, 2), mode="reflect") + mat2 = cpndi.gaussian_filter(mat2, (2, 2), mode="reflect") + else: + mat1 = xp.ascontiguousarray(mat1, dtype=xp.float32) + mat2 = xp.ascontiguousarray(mat2, dtype=xp.float32) + + (nrow1, ncol1) = mat1.shape + (nrow2, ncol2) = mat2.shape + + if nrow1 != nrow2: + raise ValueError("Two images are not at the same height!!!") + + win_width = int(np.clip(win_width, 6, min(ncol1, ncol2) // 2 - 1)) + offset = win_width // 2 + win_width = 2 * offset # Make it even + + list_metric = _calc_metrics(mat1, mat2, win_width, side, use_overlap, norm) + + min_metric = xp.min(list_metric) + if min_metric != 0.0: + list_metric /= min_metric + + return list_metric, offset + + +_calc_metrics_module = load_cuda_module( + "calc_metrics", + name_expressions=[ + "calc_metrics_kernel", + "calc_metrics_kernel", + "calc_metrics_kernel", + "calc_metrics_kernel", + ], + options=("--maxrregcount=32",), +) + + +@nvtx.annotate() +def _calc_metrics(mat1, mat2, win_width, side, use_overlap, norm): + assert mat1.dtype == xp.float32, "only float32 supported" + assert mat2.dtype == xp.float32, "only float32 supported" + assert mat1.shape[0] == mat2.shape[0] + assert mat1.flags.c_contiguous, "only contiguos arrays supported" + assert mat2.flags.c_contiguous, "only contiguos arrays supported" + + num_pos = mat1.shape[1] - win_width + list_metric = xp.empty(num_pos, dtype=xp.float32) + + args = ( + mat1, + np.int32(mat1.strides[0] / mat1.strides[1]), + mat2, + np.int32(mat2.strides[0] / mat2.strides[1]), + np.int32(win_width), + np.int32(mat1.shape[0]), + np.int32(side), + list_metric, + ) + block = (128, 1, 1) + grid = (1, np.int32(num_pos), 1) + smem = block[0] * 4 * 6 if use_overlap else block[0] * 4 * 3 + bool2str = lambda x: "true" if x is True else "false" + calc_metrics = _calc_metrics_module.get_function( + f"calc_metrics_kernel<{bool2str(norm)}, {bool2str(use_overlap)}>" + ) + calc_metrics(grid=grid, block=block, args=args, shared_mem=smem) + + return list_metric + + +@nvtx.annotate() +def _calculate_curvature(list_metric): + """ + Calculate the curvature of a fitted curve going through the minimum + value of a metric list. + + Parameters + ---------- + list_metric : array_like + 1D array. List of metrics. + + Returns + ------- + curvature : float + Quadratic coefficient of the parabola fitting. + min_pos : float + Position of the minimum value with sub-pixel accuracy. + """ + radi = 2 + num_metric = list_metric.size + min_metric_idx = int(xp.argmin(list_metric)) + min_pos = int(np.clip(min_metric_idx, radi, num_metric - radi - 1)) + + # work mostly on CPU here - we have very small arrays here + list1 = xp.asnumpy(list_metric[min_pos - radi : min_pos + radi + 1]) + afact1 = np.polyfit(np.arange(0, 2 * radi + 1), list1, 2)[0] + list2 = xp.asnumpy(list_metric[min_pos - 1 : min_pos + 2]) + (afact2, bfact2, _) = np.polyfit(np.arange(min_pos - 1, min_pos + 2), list2, 2) + + curvature = np.abs(afact1) + if afact2 != 0.0: + num = -bfact2 / (2 * afact2) + if (num >= min_pos - 1) and (num <= min_pos + 1): + min_pos = num + + return curvature, np.float32(min_pos) diff --git a/pyproject.toml b/pyproject.toml index 317818fd..282e66ac 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,8 @@ build-backend = "setuptools.build_meta" include-package-data = true packages = ["httomolibgpu", "httomolibgpu.misc", - "httomolibgpu.prep", + "httomolibgpu.prep", + "httomolibgpu.recon", "httomolibgpu.cuda_kernels"] [tool.setuptools.package-data]