diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 6d26270b..c9716b72 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -283 +284 diff --git a/reg-lib/cuda/CudaMeasure.cpp b/reg-lib/cuda/CudaMeasure.cpp index 7ef87391..f94a06d1 100644 --- a/reg-lib/cuda/CudaMeasure.cpp +++ b/reg-lib/cuda/CudaMeasure.cpp @@ -32,19 +32,28 @@ void CudaMeasure::Initialise(reg_measure& measure, F3dContent& con, F3dContent * // TODO Implement symmetric scheme for CUDA measure types reg_measure_gpu& measureGpu = dynamic_cast(measure); CudaF3dContent& cudaCon = dynamic_cast(con); + CudaF3dContent *cudaConBw = dynamic_cast(conBw); measureGpu.InitialiseMeasure(cudaCon.Content::GetReference(), + cudaCon.GetReferenceCuda(), cudaCon.Content::GetFloating(), + cudaCon.GetFloatingCuda(), cudaCon.Content::GetReferenceMask(), + cudaCon.GetReferenceMaskCuda(), cudaCon.GetActiveVoxelNumber(), cudaCon.Content::GetWarped(), + cudaCon.GetWarpedCuda(), cudaCon.F3dContent::GetWarpedGradient(), + cudaCon.GetWarpedGradientCuda(), cudaCon.F3dContent::GetVoxelBasedMeasureGradient(), + cudaCon.GetVoxelBasedMeasureGradientCuda(), cudaCon.F3dContent::GetLocalWeightSim(), - cudaCon.GetReferenceCuda(), - cudaCon.GetFloatingCuda(), - cudaCon.GetReferenceMaskCuda(), - cudaCon.GetWarpedCuda(), - cudaCon.GetWarpedGradientCuda(), - cudaCon.GetVoxelBasedMeasureGradientCuda()); + cudaConBw ? cudaConBw->Content::GetReferenceMask() : nullptr, + cudaConBw ? cudaConBw->GetReferenceMaskCuda() : nullptr, + cudaConBw ? cudaConBw->Content::GetWarped() : nullptr, + cudaConBw ? cudaConBw->GetWarpedCuda() : nullptr, + cudaConBw ? cudaConBw->F3dContent::GetWarpedGradient() : nullptr, + cudaConBw ? cudaConBw->GetWarpedGradientCuda() : nullptr, + cudaConBw ? cudaConBw->F3dContent::GetVoxelBasedMeasureGradient() : nullptr, + cudaConBw ? cudaConBw->GetVoxelBasedMeasureGradientCuda() : nullptr); } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_measure_gpu.h b/reg-lib/cuda/_reg_measure_gpu.h index f6c9615f..d91c39d6 100755 --- a/reg-lib/cuda/_reg_measure_gpu.h +++ b/reg-lib/cuda/_reg_measure_gpu.h @@ -22,19 +22,63 @@ class reg_measure_gpu { virtual ~reg_measure_gpu() {} virtual void InitialiseMeasure(nifti_image *refImg, + cudaArray *refImgCuda, nifti_image *floImg, + cudaArray *floImgCuda, int *refMask, + int *refMaskCuda, size_t activeVoxNum, nifti_image *warpedImg, - nifti_image *warpedGrad, - nifti_image *voxelBasedGrad, - nifti_image *localWeightSim, - cudaArray *refImgCuda, - cudaArray *floImgCuda, - int *refMaskCuda, float *warpedImgCuda, + nifti_image *warpedGrad, float4 *warpedGradCuda, - float4 *voxelBasedGradCuda) = 0; + nifti_image *voxelBasedGrad, + float4 *voxelBasedGradCuda, + nifti_image *localWeightSim = nullptr, + int *floMask = nullptr, + int *floMaskCuda = nullptr, + nifti_image *warpedImgBw = nullptr, + float *warpedImgBwCuda = nullptr, + nifti_image *warpedGradBw = nullptr, + float4 *warpedGradBwCuda = nullptr, + nifti_image *voxelBasedGradBw = nullptr, + float4 *voxelBasedGradBwCuda = nullptr) { + // Check that the input image are of type float + if (refImg->datatype != NIFTI_TYPE_FLOAT32 || warpedImg->datatype != NIFTI_TYPE_FLOAT32) { + reg_print_fct_error("reg_measure_gpu::InitialiseMeasure"); + reg_print_msg_error("Only single precision is supported on the GPU"); + reg_exit(); + } + // Bind the required pointers + this->referenceImageCuda = refImgCuda; + this->floatingImageCuda = floImgCuda; + this->referenceMaskCuda = refMaskCuda; + this->activeVoxelNumber = activeVoxNum; + this->warpedImageCuda = warpedImgCuda; + this->warpedGradientCuda = warpedGradCuda; + this->voxelBasedGradientCuda = voxelBasedGradCuda; + // Check if the symmetric mode is used + if (floMask != nullptr && warpedImgBw != nullptr && warpedGradBw != nullptr && voxelBasedGradBw != nullptr && + floMaskCuda != nullptr && warpedImgBwCuda != nullptr && warpedGradBwCuda != nullptr && voxelBasedGradBwCuda != nullptr) { + if (floImg->datatype != NIFTI_TYPE_FLOAT32 || warpedImgBw->datatype != NIFTI_TYPE_FLOAT32) { + reg_print_fct_error("reg_measure_gpu::InitialiseMeasure"); + reg_print_msg_error("Only single precision is supported on the GPU"); + reg_exit(); + } + this->floatingMaskCuda = floMaskCuda; + this->warpedImageBwCuda = warpedImgBwCuda; + this->warpedGradientBwCuda = warpedGradBwCuda; + this->voxelBasedGradientBwCuda = voxelBasedGradBwCuda; + } else { + this->floatingMaskCuda = nullptr; + this->warpedImageBwCuda = nullptr; + this->warpedGradientBwCuda = nullptr; + this->voxelBasedGradientBwCuda = nullptr; + } +#ifndef NDEBUG + reg_print_msg_debug("reg_measure_gpu::InitialiseMeasure() called"); +#endif + } protected: cudaArray *referenceImageCuda; @@ -44,6 +88,11 @@ class reg_measure_gpu { float *warpedImageCuda; float4 *warpedGradientCuda; float4 *voxelBasedGradientCuda; + + int *floatingMaskCuda; + float *warpedImageBwCuda; + float4 *warpedGradientBwCuda; + float4 *voxelBasedGradientBwCuda; }; /* *************************************************************** */ class reg_lncc_gpu: public reg_lncc, public reg_measure_gpu { @@ -57,19 +106,27 @@ class reg_lncc_gpu: public reg_lncc, public reg_measure_gpu { virtual ~reg_lncc_gpu() {} virtual void InitialiseMeasure(nifti_image *refImg, + cudaArray *refImgCuda, nifti_image *floImg, + cudaArray *floImgCuda, int *refMask, + int *refMaskCuda, size_t activeVoxNum, nifti_image *warpedImg, - nifti_image *warpedGrad, - nifti_image *voxelBasedGrad, - nifti_image *localWeightSim, - cudaArray *refImgCuda, - cudaArray *floImgCuda, - int *refMaskCuda, float *warpedImgCuda, + nifti_image *warpedGrad, float4 *warpedGradCuda, - float4 *voxelBasedGradCuda) override {} + nifti_image *voxelBasedGrad, + float4 *voxelBasedGradCuda, + nifti_image *localWeightSim = nullptr, + int *floMask = nullptr, + int *floMaskCuda = nullptr, + nifti_image *warpedImgBw = nullptr, + float *warpedImgBwCuda = nullptr, + nifti_image *warpedGradBw = nullptr, + float4 *warpedGradBwCuda = nullptr, + nifti_image *voxelBasedGradBw = nullptr, + float4 *voxelBasedGradBwCuda = nullptr) override {} /// @brief Returns the lncc value virtual double GetSimilarityMeasureValue() override { return 0; } /// @brief Compute the voxel based lncc gradient @@ -80,26 +137,35 @@ class reg_kld_gpu: public reg_kld, public reg_measure_gpu { public: /// @brief reg_kld_gpu class constructor reg_kld_gpu() { - fprintf(stderr, "[ERROR] CUDA CANNOT BE USED WITH KLD YET\n"); + reg_print_fct_error("reg_kld_gpu::reg_kld_gpu"); + reg_print_msg_error("CUDA CANNOT BE USED WITH KLD YET"); reg_exit(); } /// @brief reg_kld_gpu class destructor virtual ~reg_kld_gpu() {} virtual void InitialiseMeasure(nifti_image *refImg, + cudaArray *refImgCuda, nifti_image *floImg, + cudaArray *floImgCuda, int *refMask, + int *refMaskCuda, size_t activeVoxNum, nifti_image *warpedImg, - nifti_image *warpedGrad, - nifti_image *voxelBasedGrad, - nifti_image *localWeightSim, - cudaArray *refImgCuda, - cudaArray *floImgCuda, - int *refMaskCuda, float *warpedImgCuda, + nifti_image *warpedGrad, float4 *warpedGradCuda, - float4 *voxelBasedGradCuda) override {} + nifti_image *voxelBasedGrad, + float4 *voxelBasedGradCuda, + nifti_image *localWeightSim = nullptr, + int *floMask = nullptr, + int *floMaskCuda = nullptr, + nifti_image *warpedImgBw = nullptr, + float *warpedImgBwCuda = nullptr, + nifti_image *warpedGradBw = nullptr, + float4 *warpedGradBwCuda = nullptr, + nifti_image *voxelBasedGradBw = nullptr, + float4 *voxelBasedGradBwCuda = nullptr) override {} /// @brief Returns the kld value virtual double GetSimilarityMeasureValue() override { return 0; } /// @brief Compute the voxel based kld gradient @@ -110,26 +176,35 @@ class reg_dti_gpu: public reg_dti, public reg_measure_gpu { public: /// @brief reg_dti_gpu class constructor reg_dti_gpu() { - fprintf(stderr, "[ERROR] CUDA CANNOT BE USED WITH DTI YET\n"); + reg_print_fct_error("reg_dti_gpu::reg_dti_gpu"); + reg_print_msg_error("CUDA CANNOT BE USED WITH DTI YET"); reg_exit(); } /// @brief reg_dti_gpu class destructor virtual ~reg_dti_gpu() {} virtual void InitialiseMeasure(nifti_image *refImg, + cudaArray *refImgCuda, nifti_image *floImg, + cudaArray *floImgCuda, int *refMask, + int *refMaskCuda, size_t activeVoxNum, nifti_image *warpedImg, - nifti_image *warpedGrad, - nifti_image *voxelBasedGrad, - nifti_image *localWeightSim, - cudaArray *refImgCuda, - cudaArray *floImgCuda, - int *refMaskCuda, float *warpedImgCuda, + nifti_image *warpedGrad, float4 *warpedGradCuda, - float4 *voxelBasedGradCuda) override {} + nifti_image *voxelBasedGrad, + float4 *voxelBasedGradCuda, + nifti_image *localWeightSim = nullptr, + int *floMask = nullptr, + int *floMaskCuda = nullptr, + nifti_image *warpedImgBw = nullptr, + float *warpedImgBwCuda = nullptr, + nifti_image *warpedGradBw = nullptr, + float4 *warpedGradBwCuda = nullptr, + nifti_image *voxelBasedGradBw = nullptr, + float4 *voxelBasedGradBwCuda = nullptr) override {} /// @brief Returns the dti value virtual double GetSimilarityMeasureValue() override { return 0; } /// @brief Compute the voxel based dti gradient diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu index 1f5c1997..5efd0391 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.cu +++ b/reg-lib/cuda/_reg_nmi_gpu.cu @@ -10,129 +10,101 @@ * */ -#include "_reg_nmi.h" #include "_reg_nmi_gpu.h" #include "_reg_nmi_kernels.cu" +#include /* *************************************************************** */ reg_nmi_gpu::reg_nmi_gpu(): reg_nmi::reg_nmi() { - this->forwardJointHistogramLog_device = nullptr; - // this->backwardJointHistogramLog_device=nullptr; #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_nmi_gpu constructor called\n"); + reg_print_msg_debug("reg_nmi_gpu constructor called"); #endif } /* *************************************************************** */ reg_nmi_gpu::~reg_nmi_gpu() { - this->DeallocateHistogram(); -#ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_nmi_gpu destructor called\n"); -#endif -} -/* *************************************************************** */ -void reg_nmi_gpu::DeallocateHistogram() { - if (this->forwardJointHistogramLog_device != nullptr) { - cudaFree(this->forwardJointHistogramLog_device); - this->forwardJointHistogramLog_device = nullptr; - } #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_nmi_gpu::DeallocateHistogram() called\n"); + reg_print_msg_debug("reg_nmi_gpu destructor called"); #endif } /* *************************************************************** */ -void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, - nifti_image *floImg, - int *refMask, +void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda, + nifti_image *floImg, cudaArray *floImgCuda, + int *refMask, int *refMaskCuda, size_t activeVoxNum, - nifti_image *warpedImg, - nifti_image *warpedGrad, - nifti_image *voxelBasedGrad, + nifti_image *warpedImg, float *warpedImgCuda, + nifti_image *warpedGrad, float4 *warpedGradCuda, + nifti_image *voxelBasedGrad, float4 *voxelBasedGradCuda, nifti_image *localWeightSim, - cudaArray *refImgCuda, - cudaArray *floImgCuda, - int *refMaskCuda, - float *warpedImgCuda, - float4 *warpedGradCuda, - float4 *voxelBasedGradCuda) { + int *floMask, int *floMaskCuda, + nifti_image *warpedImgBw, float *warpedImgBwCuda, + nifti_image *warpedGradBw, float4 *warpedGradBwCuda, + nifti_image *voxelBasedGradBw, float4 *voxelBasedGradBwCuda) { this->DeallocateHistogram(); - reg_nmi::InitialiseMeasure(refImg, - floImg, - refMask, - warpedImg, - warpedGrad, - voxelBasedGrad); - // Check if a symmetric measure is required - if (this->isSymmetric) { - fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); - fprintf(stderr, "[NiftyReg ERROR] Symmetric scheme is not yet supported on the GPU\n"); - reg_exit(); - } + reg_nmi::InitialiseMeasure(refImg, floImg, refMask, warpedImg, warpedGrad, voxelBasedGrad, + localWeightSim, floMask, warpedImgBw, warpedGradBw, voxelBasedGradBw); + reg_measure_gpu::InitialiseMeasure(refImg, refImgCuda, floImg, floImgCuda, refMask, refMaskCuda, activeVoxNum, warpedImg, warpedImgCuda, + warpedGrad, warpedGradCuda, voxelBasedGrad, voxelBasedGradCuda, localWeightSim, floMask, floMaskCuda, + warpedImgBw, warpedImgBwCuda, warpedGradBw, warpedGradBwCuda, voxelBasedGradBw, voxelBasedGradBwCuda); // Check if the input images have multiple timepoints if (this->referenceTimePoint > 1 || this->floatingImage->nt > 1) { - fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); - fprintf(stderr, "[NiftyReg ERROR] Multiple timepoints are not yet supported on the GPU\n"); + reg_print_fct_error("reg_nmi_gpu::InitialiseMeasure"); + reg_print_msg_error("Multiple timepoints are not yet supported"); reg_exit(); } - // Check that the input image are of type float - if (this->referenceImage->datatype != NIFTI_TYPE_FLOAT32 || - this->warpedImage->datatype != NIFTI_TYPE_FLOAT32) { - fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); - fprintf(stderr, "[NiftyReg ERROR] Only single precision is supported on the GPU\n"); - reg_exit(); - } - // Bind the required pointers - this->referenceImageCuda = refImgCuda; - this->floatingImageCuda = floImgCuda; - this->referenceMaskCuda = refMaskCuda; - this->activeVoxelNumber = activeVoxNum; - this->warpedImageCuda = warpedImgCuda; - this->warpedGradientCuda = warpedGradCuda; - this->voxelBasedGradientCuda = voxelBasedGradCuda; // The reference and floating images have to be updated on the device - if (cudaCommon_transferNiftiToArrayOnDevice(this->referenceImageCuda, this->referenceImage)) { - fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); - printf("[NiftyReg ERROR] Error when transferring the reference image.\n"); - reg_exit(); - } - if (cudaCommon_transferNiftiToArrayOnDevice(this->floatingImageCuda, this->floatingImage)) { - fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); - printf("[NiftyReg ERROR] Error when transferring the floating image.\n"); + if (cudaCommon_transferNiftiToArrayOnDevice(this->referenceImageCuda, this->referenceImage) || + cudaCommon_transferNiftiToArrayOnDevice(this->floatingImageCuda, this->floatingImage)) { + reg_print_fct_error("reg_nmi_gpu::InitialiseMeasure"); + reg_print_msg_error("Error when transferring the reference or floating image"); reg_exit(); } - // Allocate the required joint histogram on the GPU - cudaMalloc(&this->forwardJointHistogramLog_device, this->totalBinNumber[0] * sizeof(float)); - #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_nmi_gpu::InitialiseMeasure called\n"); + reg_print_msg_debug("reg_nmi_gpu::InitialiseMeasure called"); #endif } /* *************************************************************** */ double reg_nmi_gpu::GetSimilarityMeasureValue() { // The NMI computation is performed into the host for now // The relevant images have to be transferred from the device to the host - NR_CUDA_SAFE_CALL(cudaMemcpy(this->warpedImage->data, - this->warpedImageCuda, - this->warpedImage->nvox * - this->warpedImage->nbyper, - cudaMemcpyDeviceToHost)); - + cudaCommon_transferFromDeviceToNifti(this->warpedImage, this->warpedImageCuda); reg_getNMIValue(this->referenceImage, this->warpedImage, this->timePointWeight, this->referenceBinNumber, this->floatingBinNumber, this->totalBinNumber, - this->forwardJointHistogramLog, - this->forwardJointHistogramPro, - this->forwardEntropyValues, + this->jointHistogramLog, + this->jointHistogramPro, + this->entropyValues, this->referenceMask); - const double nmi_value = (this->forwardEntropyValues[0][0] + this->forwardEntropyValues[0][1]) / this->forwardEntropyValues[0][2]; + if (this->isSymmetric) { + cudaCommon_transferFromDeviceToNifti(this->warpedImageBw, this->warpedImageBwCuda); + reg_getNMIValue(this->floatingImage, + this->warpedImageBw, + this->timePointWeight, + this->floatingBinNumber, + this->referenceBinNumber, + this->totalBinNumber, + this->jointHistogramLogBw, + this->jointHistogramProBw, + this->entropyValuesBw, + this->floatingMask); + } + + double nmiFw = 0, nmiBw = 0; + for (int t = 0; t < this->referenceTimePoint; ++t) { + if (this->timePointWeight[t] > 0) { + nmiFw += timePointWeight[t] * (this->entropyValues[t][0] + this->entropyValues[t][1]) / this->entropyValues[t][2]; + if (this->isSymmetric) + nmiBw += timePointWeight[t] * (this->entropyValuesBw[t][0] + this->entropyValuesBw[t][1]) / this->entropyValuesBw[t][2]; + } + } #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_nmi_gpu::GetSimilarityMeasureValue called\n"); + reg_print_msg_debug("reg_nmi_gpu::GetSimilarityMeasureValue called"); #endif - return nmi_value; + return nmiFw + nmiBw; } /* *************************************************************** */ /// Called when we only have one target and one source image @@ -190,30 +162,46 @@ void reg_getVoxelBasedNMIGradient_gpu(const nifti_image *referenceImage, } /* *************************************************************** */ void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { + // Check if the specified time point exists and is active + reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint); + if (this->timePointWeight[currentTimepoint] == 0) + return; + + // Call compute similarity measure to calculate joint histogram + this->GetSimilarityMeasureValue(); + // The latest joint histogram is transferred onto the GPU - float *temp = (float*)malloc(this->totalBinNumber[0] * sizeof(float)); - for (unsigned short i = 0; i < this->totalBinNumber[0]; ++i) - temp[i] = static_cast(this->forwardJointHistogramLog[0][i]); - cudaMemcpy(this->forwardJointHistogramLog_device, - temp, - this->totalBinNumber[0] * sizeof(float), - cudaMemcpyHostToDevice); - free(temp); + thrust::device_vector jointHistogramLogCuda(this->jointHistogramLog[0], this->jointHistogramLog[0] + this->totalBinNumber[0]); // The gradient of the NMI is computed on the GPU reg_getVoxelBasedNMIGradient_gpu(this->referenceImage, this->referenceImageCuda, this->warpedImageCuda, this->warpedGradientCuda, - this->forwardJointHistogramLog_device, + jointHistogramLogCuda.data().get(), this->voxelBasedGradientCuda, this->referenceMaskCuda, this->activeVoxelNumber, - this->forwardEntropyValues[0], + this->entropyValues[0], this->referenceBinNumber[0], this->floatingBinNumber[0]); + + if (this->isSymmetric) { + thrust::device_vector jointHistogramLogCudaBw(this->jointHistogramLogBw[0], this->jointHistogramLogBw[0] + this->totalBinNumber[0]); + reg_getVoxelBasedNMIGradient_gpu(this->floatingImage, + this->floatingImageCuda, + this->warpedImageBwCuda, + this->warpedGradientBwCuda, + jointHistogramLogCudaBw.data().get(), + this->voxelBasedGradientBwCuda, + this->floatingMaskCuda, + this->activeVoxelNumber, + this->entropyValuesBw[0], + this->floatingBinNumber[0], + this->referenceBinNumber[0]); + } #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient called\n"); + reg_print_msg_debug("reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient called\n"); #endif } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h index ea3da371..ff24a676 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.h +++ b/reg-lib/cuda/_reg_nmi_gpu.h @@ -19,57 +19,68 @@ /// @brief NMI measure of similarity class - GPU based class reg_nmi_gpu: public reg_nmi, public reg_measure_gpu { public: - /// @brief reg_nmi class constructor + /// @brief reg_nmi_gpu class constructor reg_nmi_gpu(); - /// @brief reg_nmi class destructor + /// @brief reg_nmi_gpu class destructor virtual ~reg_nmi_gpu(); /// @brief Initialise the reg_nmi_gpu object virtual void InitialiseMeasure(nifti_image *refImg, + cudaArray *refImgCuda, nifti_image *floImg, + cudaArray *floImgCuda, int *refMask, + int *refMaskCuda, size_t activeVoxNum, nifti_image *warpedImg, - nifti_image *warpedGrad, - nifti_image *voxelBasedGrad, - nifti_image *localWeightSim, - cudaArray *refImgCuda, - cudaArray *floImgCuda, - int *refMaskCuda, float *warpedImgCuda, + nifti_image *warpedGrad, float4 *warpedGradCuda, - float4 *voxelBasedGradCuda) override; + nifti_image *voxelBasedGrad, + float4 *voxelBasedGradCuda, + nifti_image *localWeightSim = nullptr, + int *floMask = nullptr, + int *floMaskCuda = nullptr, + nifti_image *warpedImgBw = nullptr, + float *warpedImgBwCuda = nullptr, + nifti_image *warpedGradBw = nullptr, + float4 *warpedGradBwCuda = nullptr, + nifti_image *voxelBasedGradBw = nullptr, + float4 *voxelBasedGradBwCuda = nullptr) override; /// @brief Returns the nmi value virtual double GetSimilarityMeasureValue() override; /// @brief Compute the voxel based nmi gradient virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override; - -protected: - float *forwardJointHistogramLog_device; - // float **backwardJointHistogramLog_device; - void DeallocateHistogram(); }; /* *************************************************************** */ /// @brief NMI measure of similarity class class reg_multichannel_nmi_gpu: public reg_multichannel_nmi, public reg_measure_gpu { public: void InitialiseMeasure(nifti_image *refImg, + cudaArray *refImgCuda, nifti_image *floImg, + cudaArray *floImgCuda, int *refMask, + int *refMaskCuda, size_t activeVoxNum, nifti_image *warpedImg, - nifti_image *warpedGrad, - nifti_image *voxelBasedGrad, - nifti_image *localWeightSim, - cudaArray *refImgCuda, - cudaArray *floImgCuda, - int *refMaskCuda, float *warpedImgCuda, + nifti_image *warpedGrad, float4 *warpedGradCuda, - float4 *voxelBasedGradCuda) override {} - /// @brief reg_nmi class constructor + nifti_image *voxelBasedGrad, + float4 *voxelBasedGradCuda, + nifti_image *localWeightSim = nullptr, + int *floMask = nullptr, + int *floMaskCuda = nullptr, + nifti_image *warpedImgBw = nullptr, + float *warpedImgBwCuda = nullptr, + nifti_image *warpedGradBw = nullptr, + float4 *warpedGradBwCuda = nullptr, + nifti_image *voxelBasedGradBw = nullptr, + float4 *voxelBasedGradBwCuda = nullptr) override {} + /// @brief reg_multichannel_nmi_gpu class constructor reg_multichannel_nmi_gpu() {} - /// @brief reg_nmi class destructor + /// @brief reg_multichannel_nmi_gpu class destructor virtual ~reg_multichannel_nmi_gpu() {} /// @brief Returns the nmi value virtual double GetSimilarityMeasureValue() override { return 0; } diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu index 275fc7ef..58a3fcb8 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.cu +++ b/reg-lib/cuda/_reg_ssd_gpu.cu @@ -16,58 +16,45 @@ /* *************************************************************** */ reg_ssd_gpu::reg_ssd_gpu(): reg_ssd::reg_ssd() { #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_ssd_gpu constructor called\n"); + reg_print_msg_debug("reg_ssd_gpu constructor called"); #endif } /* *************************************************************** */ -void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImg, - nifti_image *floImg, - int *refMask, +reg_ssd_gpu::~reg_ssd_gpu() { +#ifndef NDEBUG + reg_print_msg_debug("reg_ssd_gpu destructor called"); +#endif +} +/* *************************************************************** */ +void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda, + nifti_image *floImg, cudaArray *floImgCuda, + int *refMask, int *refMaskCuda, size_t activeVoxNum, - nifti_image *warpedImg, - nifti_image *warpedGrad, - nifti_image *voxelBasedGrad, + nifti_image *warpedImg, float *warpedImgCuda, + nifti_image *warpedGrad, float4 *warpedGradCuda, + nifti_image *voxelBasedGrad, float4 *voxelBasedGradCuda, nifti_image *localWeightSim, - cudaArray *refImgCuda, - cudaArray *floImgCuda, - int *refMaskCuda, - float *warpedImgCuda, - float4 *warpedGradCuda, - float4 *voxelBasedGradCuda) { - reg_ssd::InitialiseMeasure(refImg, - floImg, - refMask, - warpedImg, - warpedGrad, - voxelBasedGrad, - localWeightSim); + int *floMask, int *floMaskCuda, + nifti_image *warpedImgBw, float *warpedImgBwCuda, + nifti_image *warpedGradBw, float4 *warpedGradBwCuda, + nifti_image *voxelBasedGradBw, float4 *voxelBasedGradBwCuda) { + reg_ssd::InitialiseMeasure(refImg, floImg, refMask, warpedImg, warpedGrad, voxelBasedGrad, + localWeightSim, floMask, warpedImgBw, warpedGradBw, voxelBasedGradBw); + reg_measure_gpu::InitialiseMeasure(refImg, refImgCuda, floImg, floImgCuda, refMask, refMaskCuda, activeVoxNum, warpedImg, warpedImgCuda, + warpedGrad, warpedGradCuda, voxelBasedGrad, voxelBasedGradCuda, localWeightSim, floMask, floMaskCuda, + warpedImgBw, warpedImgBwCuda, warpedGradBw, warpedGradBwCuda, voxelBasedGradBw, voxelBasedGradBwCuda); // Check if a symmetric measure is required if (this->isSymmetric) { - fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); - fprintf(stderr, "[NiftyReg ERROR] Symmetric scheme is not yet supported on the GPU\n"); - reg_exit(); - } - // Check that the input image are of type float - if (this->referenceImage->datatype != NIFTI_TYPE_FLOAT32 || - this->warpedImage->datatype != NIFTI_TYPE_FLOAT32) { - fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); - fprintf(stderr, "[NiftyReg ERROR] The input images are expected to be float\n"); + reg_print_fct_error("reg_ssd_gpu::InitialiseMeasure"); + reg_print_msg_error("Symmetric scheme is not yet supported"); reg_exit(); } // Check that the input images have only one time point if (this->referenceImage->nt > 1 || this->floatingImage->nt > 1) { - fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); - fprintf(stderr, "[NiftyReg ERROR] Both input images should have only one time point\n"); + reg_print_fct_error("reg_ssd_gpu::InitialiseMeasure"); + reg_print_msg_error("Multiple timepoints are not yet supported"); reg_exit(); } - // Bind the required pointers - this->referenceImageCuda = refImgCuda; - this->floatingImageCuda = floImgCuda; - this->referenceMaskCuda = refMaskCuda; - this->activeVoxelNumber = activeVoxNum; - this->warpedImageCuda = warpedImgCuda; - this->warpedGradientCuda = warpedGradCuda; - this->voxelBasedGradientCuda = voxelBasedGradCuda; #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_ssd_gpu::InitialiseMeasure()\n"); #endif diff --git a/reg-lib/cuda/_reg_ssd_gpu.h b/reg-lib/cuda/_reg_ssd_gpu.h index c95d4064..34764df3 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.h +++ b/reg-lib/cuda/_reg_ssd_gpu.h @@ -23,23 +23,31 @@ class reg_ssd_gpu: public reg_ssd, public reg_measure_gpu { /// @brief reg_ssd class constructor reg_ssd_gpu(); /// @brief Measure class destructor - virtual ~reg_ssd_gpu() {} + virtual ~reg_ssd_gpu(); /// @brief Initialise the reg_ssd object virtual void InitialiseMeasure(nifti_image *refImg, + cudaArray *refImgCuda, nifti_image *floImg, + cudaArray *floImgCuda, int *refMask, + int *refMaskCuda, size_t activeVoxNum, nifti_image *warpedImg, - nifti_image *warpedGrad, - nifti_image *voxelBasedGrad, - nifti_image *localWeightSim, - cudaArray *refImgCuda, - cudaArray *floImgCuda, - int *refMaskCuda, float *warpedImgCuda, + nifti_image *warpedGrad, float4 *warpedGradCuda, - float4 *voxelBasedGradCuda) override; + nifti_image *voxelBasedGrad, + float4 *voxelBasedGradCuda, + nifti_image *localWeightSim = nullptr, + int *floMask = nullptr, + int *floMaskCuda = nullptr, + nifti_image *warpedImgBw = nullptr, + float *warpedImgBwCuda = nullptr, + nifti_image *warpedGradBw = nullptr, + float4 *warpedGradBwCuda = nullptr, + nifti_image *voxelBasedGradBw = nullptr, + float4 *voxelBasedGradBwCuda = nullptr) override; /// @brief Returns the ssd value virtual double GetSimilarityMeasureValue() override; /// @brief Compute the voxel based ssd gradient