Skip to content

Commit

Permalink
Bugfix
Browse files Browse the repository at this point in the history
-Fixed systems 85054 and 92966 and Rader kernel printing for multiplication Rader without LUT
  • Loading branch information
DTolm committed Sep 13, 2022
1 parent e6292c3 commit b15cb0c
Showing 1 changed file with 117 additions and 43 deletions.
160 changes: 117 additions & 43 deletions vkFFT/vkFFT.h
Original file line number Diff line number Diff line change
Expand Up @@ -1697,28 +1697,61 @@ static inline VkFFTResult appendConstantsVkFFT(VkFFTSpecializationConstantsLayou
res = VkAppendLine(sc);
if (res != VKFFT_SUCCESS) return res;
#endif
for (uint64_t j = 0; j < (sc->raderContainer[i].prime - 1); j++) {//fix later
if (!strcmp(floatType, "double")) {
double* raderFFTKernel = (double*)sc->raderContainer[i].raderFFTkernel;
sc->tempLen = sprintf(sc->tempStr, "%.17e%s ", raderFFTKernel[2 * j] / (sc->raderContainer[i].prime - 1), LFending);
res = VkAppendLine(sc);
if (res != VKFFT_SUCCESS) return res;
}
if (!strcmp(floatType, "float")) {
float* raderFFTKernel = (float*)sc->raderContainer[i].raderFFTkernel;
sc->tempLen = sprintf(sc->tempStr, "%.8e%s ", raderFFTKernel[2 * j] / (sc->raderContainer[i].prime - 1), LFending);
res = VkAppendLine(sc);
if (res != VKFFT_SUCCESS) return res;
}
if (j < (sc->raderContainer[i].prime - 2)) {
sc->tempLen = sprintf(sc->tempStr, ", ");
res = VkAppendLine(sc);
if (res != VKFFT_SUCCESS) return res;
if (sc->raderContainer[i].type == 0) {
for (uint64_t j = 0; j < (sc->raderContainer[i].prime - 1); j++) {//fix later
if (!strcmp(floatType, "double")) {
double* raderFFTKernel = (double*)sc->raderContainer[i].raderFFTkernel;
sc->tempLen = sprintf(sc->tempStr, "%.17e%s ", raderFFTKernel[2 * j] / (sc->raderContainer[i].prime - 1), LFending);
res = VkAppendLine(sc);
if (res != VKFFT_SUCCESS) return res;
}
if (!strcmp(floatType, "float")) {
float* raderFFTKernel = (float*)sc->raderContainer[i].raderFFTkernel;
sc->tempLen = sprintf(sc->tempStr, "%.8e%s ", raderFFTKernel[2 * j] / (sc->raderContainer[i].prime - 1), LFending);
res = VkAppendLine(sc);
if (res != VKFFT_SUCCESS) return res;
}
if (j < (sc->raderContainer[i].prime - 2)) {
sc->tempLen = sprintf(sc->tempStr, ", ");
res = VkAppendLine(sc);
if (res != VKFFT_SUCCESS) return res;
}
else {
sc->tempLen = sprintf(sc->tempStr, "};\n");
res = VkAppendLine(sc);
if (res != VKFFT_SUCCESS) return res;
}
}
else {
sc->tempLen = sprintf(sc->tempStr, "};\n");
res = VkAppendLine(sc);
if (res != VKFFT_SUCCESS) return res;
}
else {
long double double_PI = 3.14159265358979323846264338327950288419716939937510L;
for (uint64_t j = 0; j < (sc->raderContainer[i].prime - 1); j++) {//fix later
uint64_t g_pow = 1;
for (uint64_t t = 0; t < sc->raderContainer[i].prime - 1 - j; t++) {
g_pow = (g_pow * sc->raderContainer[i].generator) % sc->raderContainer[i].prime;
}
if (!strcmp(floatType, "double")) {
double* raderFFTKernel = (double*)sc->raderContainer[i].raderFFTkernel;
sc->tempLen = sprintf(sc->tempStr, "%.17e%s ", (double)cos(2.0 * g_pow * double_PI / sc->raderContainer[i].prime), LFending);
res = VkAppendLine(sc);
if (res != VKFFT_SUCCESS) return res;
}
if (!strcmp(floatType, "float")) {
float* raderFFTKernel = (float*)sc->raderContainer[i].raderFFTkernel;
sc->tempLen = sprintf(sc->tempStr, "%.8e%s ", (float)cos(2.0 * g_pow * double_PI / sc->raderContainer[i].prime), LFending);
res = VkAppendLine(sc);
if (res != VKFFT_SUCCESS) return res;
}
if (j < (sc->raderContainer[i].prime - 2)) {
sc->tempLen = sprintf(sc->tempStr, ", ");
res = VkAppendLine(sc);
if (res != VKFFT_SUCCESS) return res;
}
else {
sc->tempLen = sprintf(sc->tempStr, "};\n");
res = VkAppendLine(sc);
if (res != VKFFT_SUCCESS) return res;
}
}
}
#if((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
Expand All @@ -1730,29 +1763,62 @@ static inline VkFFTResult appendConstantsVkFFT(VkFFTSpecializationConstantsLayou
res = VkAppendLine(sc);
if (res != VKFFT_SUCCESS) return res;
#endif
for (uint64_t j = 0; j < (sc->raderContainer[i].prime - 1); j++) {//fix later
if (!strcmp(floatType, "double")) {
double* raderFFTKernel = (double*)sc->raderContainer[i].raderFFTkernel;
sc->tempLen = sprintf(sc->tempStr, "%.17e%s ", raderFFTKernel[2 * j + 1] / (sc->raderContainer[i].prime - 1), LFending);
res = VkAppendLine(sc);
if (res != VKFFT_SUCCESS) return res;
}
if (!strcmp(floatType, "float")) {
float* raderFFTKernel = (float*)sc->raderContainer[i].raderFFTkernel;
sc->tempLen = sprintf(sc->tempStr, "%.8e%s ", raderFFTKernel[2 * j + 1] / (sc->raderContainer[i].prime - 1), LFending);
res = VkAppendLine(sc);
if (res != VKFFT_SUCCESS) return res;
}
if (sc->raderContainer[i].type == 0) {
for (uint64_t j = 0; j < (sc->raderContainer[i].prime - 1); j++) {//fix later
if (!strcmp(floatType, "double")) {
double* raderFFTKernel = (double*)sc->raderContainer[i].raderFFTkernel;
sc->tempLen = sprintf(sc->tempStr, "%.17e%s ", raderFFTKernel[2 * j + 1] / (sc->raderContainer[i].prime - 1), LFending);
res = VkAppendLine(sc);
if (res != VKFFT_SUCCESS) return res;
}
if (!strcmp(floatType, "float")) {
float* raderFFTKernel = (float*)sc->raderContainer[i].raderFFTkernel;
sc->tempLen = sprintf(sc->tempStr, "%.8e%s ", raderFFTKernel[2 * j + 1] / (sc->raderContainer[i].prime - 1), LFending);
res = VkAppendLine(sc);
if (res != VKFFT_SUCCESS) return res;
}

if (j < (sc->raderContainer[i].prime - 2)) {
sc->tempLen = sprintf(sc->tempStr, ", ");
res = VkAppendLine(sc);
if (res != VKFFT_SUCCESS) return res;
if (j < (sc->raderContainer[i].prime - 2)) {
sc->tempLen = sprintf(sc->tempStr, ", ");
res = VkAppendLine(sc);
if (res != VKFFT_SUCCESS) return res;
}
else {
sc->tempLen = sprintf(sc->tempStr, "};\n");
res = VkAppendLine(sc);
if (res != VKFFT_SUCCESS) return res;
}
}
else {
sc->tempLen = sprintf(sc->tempStr, "};\n");
res = VkAppendLine(sc);
if (res != VKFFT_SUCCESS) return res;
}
else {
long double double_PI = 3.14159265358979323846264338327950288419716939937510L;
for (uint64_t j = 0; j < (sc->raderContainer[i].prime - 1); j++) {//fix later
uint64_t g_pow = 1;
for (uint64_t t = 0; t < sc->raderContainer[i].prime - 1 - j; t++) {
g_pow = (g_pow * sc->raderContainer[i].generator) % sc->raderContainer[i].prime;
}
if (!strcmp(floatType, "double")) {
double* raderFFTKernel = (double*)sc->raderContainer[i].raderFFTkernel;
sc->tempLen = sprintf(sc->tempStr, "%.17e%s ", (double)(-sin(2.0 * g_pow * double_PI / sc->raderContainer[i].prime)), LFending);
res = VkAppendLine(sc);
if (res != VKFFT_SUCCESS) return res;
}
if (!strcmp(floatType, "float")) {
float* raderFFTKernel = (float*)sc->raderContainer[i].raderFFTkernel;
sc->tempLen = sprintf(sc->tempStr, "%.8e%s ", (float)(-sin(2.0 * g_pow * double_PI / sc->raderContainer[i].prime)), LFending);
res = VkAppendLine(sc);
if (res != VKFFT_SUCCESS) return res;
}
if (j < (sc->raderContainer[i].prime - 2)) {
sc->tempLen = sprintf(sc->tempStr, ", ");
res = VkAppendLine(sc);
if (res != VKFFT_SUCCESS) return res;
}
else {
sc->tempLen = sprintf(sc->tempStr, "};\n");
res = VkAppendLine(sc);
if (res != VKFFT_SUCCESS) return res;
}
}
}
}
Expand Down Expand Up @@ -36234,7 +36300,9 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
axis->axisBlock[1] = final_rader_thread_count;
if (axis->groupedBatch * axis->axisBlock[1] > maxThreadNum) axis->groupedBatch = maxBatchCoalesced;
}

if (axis->specializationConstants.useRaderFFT) {
if (axis->axisBlock[1] < axis->specializationConstants.minRaderFFTThreadNum) axis->axisBlock[1] = axis->specializationConstants.minRaderFFTThreadNum;
}
uint64_t scale = app->configuration.aimThreads / axis->axisBlock[1] / axis->groupedBatch;
if (scale > 1) axis->groupedBatch *= scale;
axis->axisBlock[0] = (axis->specializationConstants.stageStartSize > axis->groupedBatch) ? axis->groupedBatch : axis->specializationConstants.stageStartSize;
Expand Down Expand Up @@ -36276,6 +36344,9 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
axis->axisBlock[1] = final_rader_thread_count;
if (axis->groupedBatch * axis->axisBlock[1] > maxThreadNum) axis->groupedBatch = maxBatchCoalesced;
}
if (axis->specializationConstants.useRaderFFT) {
if (axis->axisBlock[1] < axis->specializationConstants.minRaderFFTThreadNum) axis->axisBlock[1] = axis->specializationConstants.minRaderFFTThreadNum;
}
axis->axisBlock[0] = (FFTPlan->actualFFTSizePerAxis[axis_id][0] > axis->groupedBatch) ? axis->groupedBatch : FFTPlan->actualFFTSizePerAxis[axis_id][0];
if (axis->axisBlock[0] > app->configuration.maxComputeWorkGroupSize[0]) axis->axisBlock[0] = app->configuration.maxComputeWorkGroupSize[0];
if (axis->axisBlock[0] * axis->axisBlock[1] > maxThreadNum) {
Expand Down Expand Up @@ -36313,6 +36384,9 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
axis->axisBlock[1] = final_rader_thread_count;
if (axis->groupedBatch * axis->axisBlock[1] > maxThreadNum) axis->groupedBatch = maxBatchCoalesced;
}
if (axis->specializationConstants.useRaderFFT) {
if (axis->axisBlock[1] < axis->specializationConstants.minRaderFFTThreadNum) axis->axisBlock[1] = axis->specializationConstants.minRaderFFTThreadNum;
}
axis->axisBlock[0] = (FFTPlan->actualFFTSizePerAxis[axis_id][0] > axis->groupedBatch) ? axis->groupedBatch : FFTPlan->actualFFTSizePerAxis[axis_id][0];

if (axis->axisBlock[0] > app->configuration.maxComputeWorkGroupSize[0]) axis->axisBlock[0] = app->configuration.maxComputeWorkGroupSize[0];
Expand Down

0 comments on commit b15cb0c

Please sign in to comment.