Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions cpp/daal/src/algorithms/cholesky/cholesky_impl.i
Original file line number Diff line number Diff line change
Expand Up @@ -151,13 +151,13 @@ bool CholeskyKernel<algorithmFPType, method, cpu>::copyToFullMatrix(NumericTable

for (size_t i = iBlock * blockSize; i < endBlock; i++)
{
PRAGMA_IVDEP
PRAGMA_FORCE_SIMD
PRAGMA_VECTOR_ALWAYS
for (size_t j = 0; j <= i; j++)
{
pL[i * dim + j] = pA[i * dim + j];
}
PRAGMA_IVDEP
PRAGMA_FORCE_SIMD
PRAGMA_VECTOR_ALWAYS
for (size_t j = (i + 1); j < dim; j++)
{
Expand All @@ -176,13 +176,13 @@ bool CholeskyKernel<algorithmFPType, method, cpu>::copyToFullMatrix(NumericTable
{
const size_t ind = (i + 1) * i / 2;

PRAGMA_IVDEP
PRAGMA_FORCE_SIMD
PRAGMA_VECTOR_ALWAYS
for (size_t j = 0; j <= i; j++)
{
pL[i * dim + j] = pA[ind + j];
}
PRAGMA_IVDEP
PRAGMA_FORCE_SIMD
PRAGMA_VECTOR_ALWAYS
for (size_t j = (i + 1); j < dim; j++)
{
Expand All @@ -201,13 +201,13 @@ bool CholeskyKernel<algorithmFPType, method, cpu>::copyToFullMatrix(NumericTable
{
const size_t ind = (2 * dim - j + 1) * j / 2;

PRAGMA_IVDEP
PRAGMA_FORCE_SIMD
PRAGMA_VECTOR_ALWAYS
for (size_t i = 0; i < j; i++)
{
pL[i * dim + j] = algorithmFPType(0);
}
PRAGMA_IVDEP
PRAGMA_FORCE_SIMD
PRAGMA_VECTOR_ALWAYS
for (size_t i = j; i < dim; i++)
{
Expand Down Expand Up @@ -247,7 +247,7 @@ services::Status CholeskyKernel<algorithmFPType, method, cpu>::copyToLowerTriang
{
const size_t ind = (i + 1) * i / 2;

PRAGMA_IVDEP
PRAGMA_FORCE_SIMD
PRAGMA_VECTOR_ALWAYS
for (size_t j = 0; j <= i; j++)
{
Expand Down Expand Up @@ -276,7 +276,7 @@ services::Status CholeskyKernel<algorithmFPType, method, cpu>::copyToLowerTriang
{
const size_t ind = (j + 1) * j / 2;

PRAGMA_IVDEP
PRAGMA_FORCE_SIMD
PRAGMA_VECTOR_ALWAYS
for (size_t i = 0; i <= j; i++)
{
Expand Down
12 changes: 6 additions & 6 deletions cpp/daal/src/algorithms/covariance/covariance_impl.i
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ public:
/* Sum input array elements in case of non-normalized data */
for (DAAL_INT i = 0; i < nRows; i++)
{
PRAGMA_IVDEP
PRAGMA_FORCE_SIMD
PRAGMA_VECTOR_ALWAYS
for (DAAL_INT j = 0; j < _nFeatures; j++)
{
Expand Down Expand Up @@ -269,7 +269,7 @@ public:
}

/// It is safe to use aligned loads and stores because the data in TArrayScalableCalloc data structures is aligned
PRAGMA_IVDEP
PRAGMA_FORCE_SIMD
PRAGMA_VECTOR_ALWAYS
PRAGMA_VECTOR_ALIGNED
for (size_t i = 0; i < (_nFeatures * _nFeatures); i++)
Expand All @@ -286,7 +286,7 @@ public:
return;
}
/// It is safe to use aligned loads and stores because the data is aligned
PRAGMA_IVDEP
PRAGMA_FORCE_SIMD
PRAGMA_VECTOR_ALWAYS
PRAGMA_VECTOR_ALIGNED
for (size_t i = 0; i < _nFeatures; i++)
Expand Down Expand Up @@ -407,7 +407,7 @@ services::Status updateDenseCrossProductAndSums(bool isNormalized, size_t nFeatu
}
for (size_t i = 0; i < nFeatures; i++)
{
PRAGMA_IVDEP
PRAGMA_FORCE_SIMD
PRAGMA_VECTOR_ALWAYS
for (size_t j = 0; j < nFeatures; j++)
{
Expand Down Expand Up @@ -501,7 +501,7 @@ void mergeCrossProductAndSums(size_t nFeatures, const algorithmFPType * partialC
if (nObsValue == 0)
{
daal::threader_for(nFeatures, nFeatures, [=](size_t i) {
PRAGMA_IVDEP
PRAGMA_FORCE_SIMD
PRAGMA_VECTOR_ALWAYS
for (size_t j = 0; j <= i; j++)
{
Expand All @@ -517,7 +517,7 @@ void mergeCrossProductAndSums(size_t nFeatures, const algorithmFPType * partialC
algorithmFPType invNewNObs = 1.0 / (nObsValue + partialNObsValue);

daal::threader_for(nFeatures, nFeatures, [=](size_t i) {
PRAGMA_IVDEP
PRAGMA_FORCE_SIMD
PRAGMA_VECTOR_ALWAYS
for (size_t j = 0; j <= i; j++)
{
Expand Down
6 changes: 3 additions & 3 deletions cpp/daal/src/algorithms/dtrees/dtrees_train_data_helper.i
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ int doPartition(SizeType n, const IndexType * aIdx, const ResponseType * aRespon
SizeType iRight = 0;
int iRowSplitVal = -1;

PRAGMA_IVDEP
PRAGMA_FORCE_SIMD
PRAGMA_VECTOR_ALWAYS
for (SizeType i = 0; i < n; ++i)
{
Expand Down Expand Up @@ -457,7 +457,7 @@ int doPartitionIdx(SizeType n, const IndexType * aIdx, const IndexType * aIdx2,

if (aIdx2)
{
PRAGMA_IVDEP
PRAGMA_FORCE_SIMD
PRAGMA_VECTOR_ALWAYS
for (SizeType i = 0; i < n; ++i)
{
Expand All @@ -478,7 +478,7 @@ int doPartitionIdx(SizeType n, const IndexType * aIdx, const IndexType * aIdx2,
}
else
{
PRAGMA_IVDEP
PRAGMA_FORCE_SIMD
PRAGMA_VECTOR_ALWAYS
for (SizeType i = 0; i < n; ++i)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ DAAL_FORCEINLINE void fillResults(const size_t nClasses, const enum VotingMethod
{
if (votingMethod == VotingMethod::unweighted || probas == nullptr)
{
PRAGMA_IVDEP
PRAGMA_FORCE_SIMD
PRAGMA_VECTOR_ALWAYS
for (size_t i = 0; i < blockSize; ++i)
{
Expand All @@ -80,7 +80,7 @@ DAAL_FORCEINLINE void fillResults(const size_t nClasses, const enum VotingMethod
{
for (size_t i = 0; i < blockSize; ++i)
{
PRAGMA_IVDEP
PRAGMA_FORCE_SIMD
PRAGMA_VECTOR_ALWAYS
for (size_t j = 0; j < nClasses; ++j)
{
Expand Down Expand Up @@ -340,7 +340,7 @@ Status PredictClassificationTask<algorithmFPType, cpu>::predictByTrees(const siz
}
else if (_votingMethod == VotingMethod::weighted)
{
PRAGMA_IVDEP
PRAGMA_FORCE_SIMD
PRAGMA_VECTOR_ALWAYS
for (size_t i = 0; i < _nClasses; ++i)
{
Expand All @@ -357,7 +357,7 @@ Status PredictClassificationTask<algorithmFPType, cpu>::predictByTrees(const siz
sum += resPtr[i];
}

PRAGMA_IVDEP
PRAGMA_FORCE_SIMD
PRAGMA_VECTOR_ALWAYS
for (size_t i = 0; i < _nClasses; ++i)
{
Expand Down Expand Up @@ -403,7 +403,7 @@ Status PredictClassificationTask<algorithmFPType, cpu>::predictByTreesWithoutCon
}
else if (_votingMethod == VotingMethod::weighted)
{
PRAGMA_IVDEP
PRAGMA_FORCE_SIMD
PRAGMA_VECTOR_ALWAYS
for (size_t i = 0; i < _nClasses; ++i)
{
Expand All @@ -420,7 +420,7 @@ Status PredictClassificationTask<algorithmFPType, cpu>::predictByTreesWithoutCon
sum += resPtr[i];
}

PRAGMA_IVDEP
PRAGMA_FORCE_SIMD
PRAGMA_VECTOR_ALWAYS
for (size_t i = 0; i < _nClasses; ++i)
{
Expand Down Expand Up @@ -449,7 +449,7 @@ Status PredictClassificationTask<algorithmFPType, cpu>::parallelPredict(const al

SafeStatus safeStat;

PRAGMA_IVDEP
PRAGMA_FORCE_SIMD
PRAGMA_VECTOR_ALWAYS
for (size_t i = 0; i < treeSize; ++i)
{
Expand Down Expand Up @@ -769,7 +769,7 @@ DAAL_FORCEINLINE Status PredictClassificationTask<algorithmFPType, cpu>::predict
}
if (probPtr != nullptr)
{
PRAGMA_IVDEP
PRAGMA_FORCE_SIMD
PRAGMA_VECTOR_ALWAYS
for (size_t j = 0; j < _nClasses; ++j)
{
Expand Down Expand Up @@ -891,7 +891,7 @@ DAAL_FORCEINLINE Status PredictClassificationTask<float, avx512>::predictOneRowB
{
const size_t treeSize = _aTree[iTree + i]->getNumberOfRows();
const DecisionTreeNode * aNode = (const DecisionTreeNode *)(*_aTree[iTree + i]).getArray();
PRAGMA_IVDEP
PRAGMA_FORCE_SIMD
PRAGMA_VECTOR_ALWAYS
for (size_t j = 0; j < treeSize; ++j)
{
Expand Down Expand Up @@ -1002,7 +1002,7 @@ DAAL_FORCEINLINE Status PredictClassificationTask<float, avx512>::predictOneRowB
}
if (probPtr != nullptr)
{
PRAGMA_IVDEP
PRAGMA_FORCE_SIMD
PRAGMA_VECTOR_ALWAYS
for (size_t j = 0; j < _nClasses; ++j)
{
Expand Down
Loading
Loading