Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: kdd tree knn example segfault #2925

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
184 changes: 94 additions & 90 deletions ...gorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
#ifndef __KDTREE_KNN_CLASSIFICATION_PREDICT_DENSE_DEFAULT_BATCH_IMPL_I__
#define __KDTREE_KNN_CLASSIFICATION_PREDICT_DENSE_DEFAULT_BATCH_IMPL_I__

#include "src/threading/threading.h"
#include "services/daal_defines.h"
#include "src/threading/threading.h"
#include "src/services/service_utils.h"
#include "algorithms/algorithm.h"
#include "services/daal_atomic_int.h"
Expand All @@ -39,7 +39,7 @@
#include "src/algorithms/k_nearest_neighbors/kdtree_knn_classification_model_impl.h"
#include "src/algorithms/k_nearest_neighbors/kdtree_knn_impl.i"
#include "src/algorithms/k_nearest_neighbors/knn_heap.h"

#include <iostream>
namespace daal
{
namespace algorithms
Expand Down Expand Up @@ -162,132 +162,130 @@ Status KNNClassificationPredictKernel<algorithmFpType, defaultDense, cpu>::compu
{
iSize *= 2;
}
const size_t heapSize = (iSize / 16 + 1) * 16;

const size_t heapSize = (iSize / 16 + 1) * 16;
const size_t xRowCount = x->getNumberOfRows();
const algorithmFpType base = 2.0;
const size_t expectedMaxDepth = (Math::sLog(xRowCount) / Math::sLog(base) + 1) * __KDTREE_DEPTH_MULTIPLICATION_FACTOR;
const size_t stackSize = Math::sPowx(base, Math::sCeil(Math::sLog(expectedMaxDepth) / Math::sLog(base)));
const size_t expectedMaxDepth = (Math::xsLog(xRowCount) / Math::xsLog(base) + 1) * __KDTREE_DEPTH_MULTIPLICATION_FACTOR;
const size_t stackSize = Math::xsPowx(base, Math::xsCeil(Math::xsLog(expectedMaxDepth) / Math::xsLog(base)));
struct Local
{
MaxHeap heap;
SearchStack stack;
};

SafeStatus safeStat;
daal::tls<Local *> localTLS([&]() -> Local * {
Local * const ptr = service_scalable_calloc<Local, cpu>(1);
if (ptr)
{
if (!ptr->heap.init(heapSize))
{
status.add(services::ErrorMemoryAllocationFailed);
safeStat.add(services::ErrorMemoryAllocationFailed);
service_scalable_free<Local, cpu>(ptr);
return nullptr;
}
if (!ptr->stack.init(stackSize))
{
status.add(services::ErrorMemoryAllocationFailed);
safeStat.add(services::ErrorMemoryAllocationFailed);
ptr->heap.clear();
service_scalable_free<Local, cpu>(ptr);
return nullptr;
}
}
else
{
status.add(services::ErrorMemoryAllocationFailed);
safeStat.add(services::ErrorMemoryAllocationFailed);
}
return ptr;
});

DAAL_CHECK_STATUS_OK((status.ok()), status);

const auto maxThreads = threader_get_threads_number();
auto nThreads = (maxThreads < 1) ? 1 : maxThreads;
const size_t xColumnCount = x->getNumberOfColumns();
const auto rowsPerBlock = (xRowCount + maxThreads - 1) / maxThreads;
const auto rowsPerBlock = (xRowCount + nThreads - 1) / nThreads;
const auto blockCount = (xRowCount + rowsPerBlock - 1) / rowsPerBlock;
SafeStatus safeStat;

services::internal::TArrayScalable<algorithmFpType *, cpu> soa_arrays;
bool isHomogenSOA = checkHomogenSOA<algorithmFpType, cpu>(data, soa_arrays);

daal::threader_for(blockCount, blockCount, [&](int iBlock) {
Local * const local = localTLS.local();
if (local)
{
services::Status s;
DAAL_CHECK_MALLOC_THR(local);

const size_t first = iBlock * rowsPerBlock;
const size_t last = min<cpu>(static_cast<decltype(xRowCount)>(first + rowsPerBlock), xRowCount);
const size_t first = iBlock * rowsPerBlock;
const size_t last = min<cpu>(static_cast<decltype(xRowCount)>(first + rowsPerBlock), xRowCount);

const algorithmFpType radius = MaxVal::get();
data_management::BlockDescriptor<algorithmFpType> xBD;
const_cast<NumericTable &>(*x).getBlockOfRows(first, last - first, readOnly, xBD);
const algorithmFpType * const dx = xBD.getBlockPtr();

data_management::BlockDescriptor<int> indicesBD;
data_management::BlockDescriptor<algorithmFpType> distancesBD;
if (indices)
{
s = indices->getBlockOfRows(first, last - first, writeOnly, indicesBD);
DAAL_CHECK_STATUS_THR(s);
}
if (distances)
{
s = distances->getBlockOfRows(first, last - first, writeOnly, distancesBD);
DAAL_CHECK_STATUS_THR(s);
}
const algorithmFpType radius = MaxVal::get();
data_management::BlockDescriptor<algorithmFpType> xBD;
const_cast<NumericTable &>(*x).getBlockOfRows(first, last - first, readOnly, xBD);
const algorithmFpType * const dx = xBD.getBlockPtr();

if (labels)
data_management::BlockDescriptor<int> indicesBD;
data_management::BlockDescriptor<algorithmFpType> distancesBD;
if (indices)
{
DAAL_CHECK_STATUS_THR(indices->getBlockOfRows(first, last - first, writeOnly, indicesBD));
}
if (distances)
{
DAAL_CHECK_STATUS_THR(distances->getBlockOfRows(first, last - first, writeOnly, distancesBD));
}
if (labels)
{
const size_t yColumnCount = y->getNumberOfColumns();
std::cout << "here labels -1" << std::endl;
data_management::BlockDescriptor<algorithmFpType> yBD;
std::cout << "here labels 0" << std::endl;
y->getBlockOfRows(first, last - first, writeOnly, yBD);
auto * const dy = yBD.getBlockPtr();
std::cout << "here labels 1" << std::endl;
for (size_t i = 0; i < last - first; ++i)
{
const size_t yColumnCount = y->getNumberOfColumns();
data_management::BlockDescriptor<algorithmFpType> yBD;
y->getBlockOfRows(first, last - first, writeOnly, yBD);
auto * const dy = yBD.getBlockPtr();

for (size_t i = 0; i < last - first; ++i)
{
findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, rootTreeNodeIndex, data,
isHomogenSOA, soa_arrays);
s = predict(&(dy[i * yColumnCount]), local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses);
DAAL_CHECK_STATUS_THR(s)
}

s |= y->releaseBlockOfRows(yBD);
DAAL_CHECK_STATUS_THR(s);
findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, rootTreeNodeIndex, data, isHomogenSOA,
soa_arrays);
DAAL_CHECK_STATUS_THR(
predict(&dy[i * yColumnCount], local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses));
}
else
std::cout << "here labels 2" << std::endl;
y->releaseBlockOfRows(yBD);
std::cout << "here labels 3" << std::endl;
}
else
{
for (size_t i = 0; i < last - first; ++i)
{
for (size_t i = 0; i < last - first; ++i)
{
findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, rootTreeNodeIndex, data,
isHomogenSOA, soa_arrays);
s = predict(nullptr, local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses);
DAAL_CHECK_STATUS_THR(s)
}
findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, rootTreeNodeIndex, data, isHomogenSOA,
soa_arrays);
DAAL_CHECK_STATUS_THR(predict(nullptr, local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses));
}
}

if (indices)
{
s |= indices->releaseBlockOfRows(indicesBD);
}
DAAL_CHECK_STATUS_THR(s);
if (distances)
{
s |= distances->releaseBlockOfRows(distancesBD);
}
DAAL_CHECK_STATUS_THR(s);
if (indices)
{
DAAL_CHECK_STATUS_THR(indices->releaseBlockOfRows(indicesBD));
}

const_cast<NumericTable &>(*x).releaseBlockOfRows(xBD);
if (distances)
{
DAAL_CHECK_STATUS_THR(distances->releaseBlockOfRows(distancesBD));
}
const_cast<NumericTable &>(*x).releaseBlockOfRows(xBD);
});

DAAL_CHECK_SAFE_STATUS()

std::cout << "here final 1" << std::endl;
status = safeStat.detach();
std::cout << "here final 2" << std::endl;
if (!status) return status;
std::cout << "here final 3" << std::endl;
localTLS.reduce([&](Local * ptr) -> void {
if (ptr)
{
ptr->stack.clear();
std::cout << "here final 4" << std::endl;
ptr->heap.clear();
std::cout << "here final 5" << std::endl;
service_scalable_free<Local, cpu>(ptr);
std::cout << "here final 6" << std::endl;
}
});
return status;
Expand Down Expand Up @@ -438,22 +436,23 @@ services::Status KNNClassificationPredictKernel<algorithmFpType, defaultDense, c
data_management::BlockDescriptor<algorithmFpType> & distances, size_t index, const size_t nClasses)
{
typedef daal::internal::MathInst<algorithmFpType, cpu> Math;

std::cout << "here debug1" << std::endl;
const size_t heapSize = heap.size();
if (heapSize < 1) return services::Status();

std::cout << "here debug2" << std::endl;
if (indices.getNumberOfRows() != 0)
{
std::cout << "here debug3" << std::endl;
DAAL_ASSERT(modelIndices);

services::Status s;
data_management::BlockDescriptor<int> modelIndicesBD;

std::cout << "here debug4" << std::endl;
const auto nIndices = indices.getNumberOfColumns();
DAAL_ASSERT(heapSize <= nIndices);

int * const indicesPtr = indices.getBlockPtr() + index * nIndices;

std::cout << "here debug5" << std::endl;
for (size_t i = 0; i < heapSize; ++i)
{
s |= const_cast<NumericTable *>(modelIndices)->getBlockOfRows(heap[i].index, 1, readOnly, modelIndicesBD);
Expand All @@ -464,51 +463,53 @@ services::Status KNNClassificationPredictKernel<algorithmFpType, defaultDense, c
s |= const_cast<NumericTable *>(modelIndices)->releaseBlockOfRows(modelIndicesBD);
DAAL_ASSERT(s.ok());
}
std::cout << "here debug6" << std::endl;
}

if (distances.getNumberOfRows() != 0)
{
services::Status s;

std::cout << "here debug7" << std::endl;
const auto nDistances = distances.getNumberOfColumns();
DAAL_ASSERT(heapSize <= nDistances);

std::cout << "here debug8" << std::endl;
algorithmFpType * const distancesPtr = distances.getBlockPtr() + index * nDistances;
for (size_t i = 0; i < heapSize; ++i)
{
distancesPtr[i] = heap[i].distance;
}

Math::vSqrt(heapSize, distancesPtr, distancesPtr);

std::cout << "here debug9" << std::endl;
Math::xvSqrt(heapSize, distancesPtr, distancesPtr);
std::cout << "here debug10" << std::endl;
for (size_t i = heapSize; i < nDistances; ++i)
{
distancesPtr[i] = -1;
}
std::cout << "here debug11" << std::endl;
}

if (labels)
{
DAAL_ASSERT(predictedClass);

std::cout << "here debug12" << std::endl;
data_management::BlockDescriptor<algorithmFpType> labelBD;
algorithmFpType * classes = static_cast<algorithmFpType *>(daal::services::internal::service_malloc<algorithmFpType, cpu>(heapSize));
algorithmFpType * classWeights = static_cast<algorithmFpType *>(daal::services::internal::service_malloc<algorithmFpType, cpu>(nClasses));
DAAL_CHECK_MALLOC(classWeights);
DAAL_CHECK_MALLOC(classes);

std::cout << "here debug13" << std::endl;
for (size_t i = 0; i < nClasses; ++i)
{
classWeights[i] = 0;
}

std::cout << "here debug14" << std::endl;
for (size_t i = 0; i < heapSize; ++i)
{
const_cast<NumericTable *>(labels)->getBlockOfColumnValues(0, heap[i].index, 1, readOnly, labelBD);
classes[i] = *(labelBD.getBlockPtr());
const_cast<NumericTable *>(labels)->releaseBlockOfColumnValues(labelBD);
}

std::cout << "here debug15" << std::endl;
if (voteWeights == voteUniform)
{
for (size_t i = 0; i < heapSize; ++i)
Expand All @@ -519,11 +520,11 @@ services::Status KNNClassificationPredictKernel<algorithmFpType, defaultDense, c
else
{
DAAL_ASSERT(voteWeights == voteDistance);

std::cout << "here debug17" << std::endl;
const algorithmFpType epsilon = daal::services::internal::EpsilonVal<algorithmFpType>::get();

bool isContainZero = false;

std::cout << "here debug18" << std::endl;
for (size_t i = 0; i < heapSize; ++i)
{
if (heap[i].distance <= epsilon)
Expand All @@ -532,7 +533,7 @@ services::Status KNNClassificationPredictKernel<algorithmFpType, defaultDense, c
break;
}
}

std::cout << "here debug19" << std::endl;
if (isContainZero)
{
for (size_t i = 0; i < heapSize; ++i)
Expand All @@ -551,7 +552,7 @@ services::Status KNNClassificationPredictKernel<algorithmFpType, defaultDense, c
}
}
}

std::cout << "here debug21" << std::endl;
algorithmFpType maxWeightClass = 0;
algorithmFpType maxWeight = 0;
for (size_t i = 0; i < nClasses; ++i)
Expand All @@ -562,10 +563,13 @@ services::Status KNNClassificationPredictKernel<algorithmFpType, defaultDense, c
maxWeightClass = i;
}
}
std::cout << "here debug22" << std::endl;
*predictedClass = maxWeightClass;

std::cout << "here debug23" << std::endl;
service_free<algorithmFpType, cpu>(classes);
std::cout << "here debug24" << std::endl;
service_free<algorithmFpType, cpu>(classWeights);
std::cout << "here debug25" << std::endl;
classes = nullptr;
}

Expand Down
Loading
Loading