Skip to content

Commit

Permalink
Removal of Eigen's PartialPivLU and replacement with Crout LU decompo…
Browse files Browse the repository at this point in the history
…sition (#964)
  • Loading branch information
Christos Kotsalos authored Oct 24, 2022
1 parent 16a09e1 commit 888f287
Show file tree
Hide file tree
Showing 18 changed files with 442 additions and 186 deletions.
2 changes: 1 addition & 1 deletion .clang-format.changes
Original file line number Diff line number Diff line change
@@ -1 +1 @@
StatementMacros: [InstantiatePartialPivLu, PartialPivLuInstantiations, nrn_pragma_acc, nrn_pragma_omp]
StatementMacros: [nrn_pragma_acc, nrn_pragma_omp]
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
url = https://github.com/fmtlib/fmt.git
[submodule "ext/eigen"]
path = ext/eigen
url = https://github.com/BlueBrain/eigen.git
url = https://gitlab.com/libeigen/eigen.git
[submodule "ext/json"]
path = ext/json
url = https://github.com/nlohmann/json.git
Expand Down
1 change: 1 addition & 0 deletions .sanitizers/undefined.supp
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
implicit-integer-sign-change:double vector[2] Eigen::internal::pabs<double vector[2]>(double vector[2] const&)
unsigned-integer-overflow:nmodl::fast_math::vexp(double)
unsigned-integer-overflow:nmodl::fast_math::vexpm1(double)
unsigned-integer-overflow:std::mersenne_twister_engine
4 changes: 4 additions & 0 deletions cmake/CompilerHelper.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "PGI" OR CMAKE_CXX_COMPILER_ID MATCHES "NVHPC")
# CMake adds standard complaint PGI flag "-A" which breaks compilation of of spdlog and fmt
set(CMAKE_CXX14_STANDARD_COMPILE_OPTION --c++14)

# Avoid errors related to "excessive recursion at instantiation of function ...", Eigen-related
# (in accelerated regions), e.g., transposeInPlace()
list(APPEND NMODL_EXTRA_CXX_FLAGS "-Wc,--pending_instantiations=0")

# ~~~
# PGI enables number of diagnostic messages by default classes which results into thousands of
# messages specifically for AST. Disable these verbose warnings for now.
Expand Down
25 changes: 2 additions & 23 deletions docs/notebooks/nmodl-linear-solver.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -12,34 +12,13 @@
"\n",
"If the system is sufficiently small (by default $N\\leq3$), then Gaussian elimination is used to directly construct the solution at compile time using SymPy to do the symbolic Gaussian elimination. Optionally Common Subexpression Elimination (CSE) can also be performed.\n",
"\n",
"For larger matrices it may not be numerically safe to solve them at compile time by Gaussian elimination, so instead the matrix equation is constructed and then solved at run time by LU factorization with partial pivoting, using the `PartialPivLU()` function from the Eigen header-only matrix algebra library."
"For larger matrices it may not be numerically safe to solve them at compile time by Gaussian elimination, so instead the matrix equation is constructed and then solved at run time by LU factorization with partial pivoting (for more, see Crout solver in `src/solver/crout` and `test/unit/crout`)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.7"
"name": "python"
}
},
"nbformat": 4,
Expand Down
2 changes: 1 addition & 1 deletion ext/eigen
Submodule eigen updated from 7e8a37 to 314739
38 changes: 17 additions & 21 deletions src/codegen/codegen_acc_visitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ namespace codegen {
* for(int id=0; id<nodecount; id++) {
*
*/
void CodegenAccVisitor::print_channel_iteration_block_parallel_hint(BlockType type) {
void CodegenAccVisitor::print_channel_iteration_block_parallel_hint(BlockType type,
bool error_checking) {
if (info.artificial_cell) {
return;
}
Expand All @@ -47,9 +48,21 @@ void CodegenAccVisitor::print_channel_iteration_block_parallel_hint(BlockType ty
}
}
present_clause << ')';
printer->fmt_line("nrn_pragma_acc(parallel loop {} async(nt->stream_id) if(nt->compute_gpu))",
present_clause.str());
printer->add_line("nrn_pragma_omp(target teams distribute parallel for if(nt->compute_gpu))");
if (error_checking) {
printer->fmt_line(
"nrn_pragma_acc(parallel loop {} reduction(+:solver_error) async(nt->stream_id) "
"if(nt->compute_gpu))",
present_clause.str());
printer->add_line(
"nrn_pragma_omp(target teams distribute parallel for reduction(+:solver_error) "
"if(nt->compute_gpu))");
} else {
printer->fmt_line(
"nrn_pragma_acc(parallel loop {} async(nt->stream_id) if(nt->compute_gpu))",
present_clause.str());
printer->add_line(
"nrn_pragma_omp(target teams distribute parallel for if(nt->compute_gpu))");
}
}


Expand All @@ -71,15 +84,6 @@ void CodegenAccVisitor::print_backend_includes() {
printer->add_line("#include <coreneuron/utils/offload.hpp>");
printer->add_line("#include <cuda_runtime_api.h>");
}

if (info.eigen_linear_solver_exist && std::accumulate(info.state_vars.begin(),
info.state_vars.end(),
0,
[](int l, const SymbolType& variable) {
return l += variable->get_length();
}) > 4) {
printer->add_line("#include <partial_piv_lu/partial_piv_lu.h>");
}
}


Expand Down Expand Up @@ -131,14 +135,6 @@ void CodegenAccVisitor::print_net_send_buffering_grow() {
// can not grow buffer during gpu execution
}

void CodegenAccVisitor::print_eigen_linear_solver(const std::string& /* float_type */, int N) {
if (N <= 4) {
printer->add_line("nmodl_eigen_xm = nmodl_eigen_jm.inverse()*nmodl_eigen_fm;");
} else {
printer->fmt_line("nmodl_eigen_xm = partialPivLu<{}>nmodl_eigen_jm, nmodl_eigen_fm);", N);
}
}

/**
* Each kernel like nrn_init, nrn_state and nrn_cur could be offloaded
* to accelerator. In this case, at very top level, we print pragma
Expand Down
6 changes: 2 additions & 4 deletions src/codegen/codegen_acc_visitor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ class CodegenAccVisitor: public CodegenCVisitor {


/// ivdep like annotation for channel iterations
void print_channel_iteration_block_parallel_hint(BlockType type) override;
void print_channel_iteration_block_parallel_hint(BlockType type,
bool error_checking = false) override;


/// atomic update pragma for reduction statements
Expand Down Expand Up @@ -128,9 +129,6 @@ class CodegenAccVisitor: public CodegenCVisitor {
void print_net_send_buffering_grow() override;


void print_eigen_linear_solver(const std::string& float_type, int N) override;


public:
CodegenAccVisitor(const std::string& mod_file,
const std::string& output_dir,
Expand Down
71 changes: 63 additions & 8 deletions src/codegen/codegen_c_visitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1108,7 +1108,8 @@ void CodegenCVisitor::print_net_init_acc_serial_annotation_block_end() {
* for(int id = 0; id < nodecount; id++) {
* \endcode
*/
void CodegenCVisitor::print_channel_iteration_block_parallel_hint(BlockType /* type */) {
void CodegenCVisitor::print_channel_iteration_block_parallel_hint(BlockType /* type */,
bool /* error_checking */) {
printer->add_line("#pragma ivdep");
printer->add_line("#pragma omp simd");
}
Expand Down Expand Up @@ -1853,6 +1854,7 @@ void CodegenCVisitor::visit_eigen_newton_solver_block(const ast::EigenNewtonSolv
printer->add_line("newton_functor.initialize();");
printer->add_line(
"int newton_iterations = nmodl::newton::newton_solver(nmodl_eigen_xm, newton_functor);");
printer->add_line("if (newton_iterations < 0) solver_error += 1;");

// assign newton solver results in matrix X to state vars
print_statement_block(*node.get_update_states_block(), false, false);
Expand All @@ -1866,6 +1868,10 @@ void CodegenCVisitor::visit_eigen_linear_solver_block(const ast::EigenLinearSolv
int N = node.get_n_state_vars()->get_value();
printer->fmt_line("Eigen::Matrix<{0}, {1}, 1> nmodl_eigen_xm, nmodl_eigen_fm;", float_type, N);
printer->fmt_line("Eigen::Matrix<{0}, {1}, {1}> nmodl_eigen_jm;", float_type, N);
if (N <= 4) {
printer->add_line("bool invertible;");
printer->fmt_line("Eigen::Matrix<{0}, {1}, {1}> nmodl_eigen_jm_inv;", float_type, N);
}
printer->fmt_line("{}* nmodl_eigen_x = nmodl_eigen_xm.data();", float_type);
printer->fmt_line("{}* nmodl_eigen_j = nmodl_eigen_jm.data();", float_type);
printer->fmt_line("{}* nmodl_eigen_f = nmodl_eigen_fm.data();", float_type);
Expand All @@ -1884,11 +1890,30 @@ void CodegenCVisitor::visit_eigen_linear_solver_block(const ast::EigenLinearSolv
void CodegenCVisitor::print_eigen_linear_solver(const std::string& float_type, int N) {
if (N <= 4) {
// Faster compared to LU, given the template specialization in Eigen.
printer->add_line("nmodl_eigen_xm = nmodl_eigen_jm.inverse()*nmodl_eigen_fm;");
printer->add_line("nmodl_eigen_jm.computeInverseWithCheck(nmodl_eigen_jm_inv,invertible);");
printer->add_line("nmodl_eigen_xm = nmodl_eigen_jm_inv*nmodl_eigen_fm;");
printer->add_line("if(!invertible) solver_error += 1;");
} else {
// In Eigen the default storage order is ColMajor.
// Crout's implementation requires matrices stored in RowMajor order (C-style arrays).
// Therefore, the transposeInPlace is critical such that the data() method to give the rows
// instead of the columns.
printer->add_line("if (!nmodl_eigen_jm.IsRowMajor) nmodl_eigen_jm.transposeInPlace();");

// pivot vector
printer->fmt_line("Eigen::Matrix<int, {}, 1> pivot;", N);

// In-place LU-Decomposition (Crout Algo) : Jm is replaced by its LU-decomposition
printer->fmt_line(
"if (nmodl::crout::Crout<{0}>({1}, nmodl_eigen_jm.data(), pivot.data()) < 0) "
"solver_error += 1;",
float_type,
N);

// Solve the linear system : Forward/Backward substitution part
printer->fmt_line(
"nmodl_eigen_xm = Eigen::PartialPivLU<Eigen::Ref<Eigen::Matrix<{0}, {1}, "
"{1}>>>(nmodl_eigen_jm).solve(nmodl_eigen_fm);",
"nmodl::crout::solveCrout<{0}>({1}, nmodl_eigen_jm.data(), nmodl_eigen_fm.data(), "
"nmodl_eigen_xm.data(), pivot.data());",
float_type,
N);
}
Expand Down Expand Up @@ -2465,7 +2490,17 @@ void CodegenCVisitor::print_coreneuron_includes() {
printer->add_line("#include <newton/newton.hpp>");
}
if (info.eigen_linear_solver_exist) {
printer->add_line("#include <Eigen/LU>");
if (std::accumulate(info.state_vars.begin(),
info.state_vars.end(),
0,
[](int l, const SymbolType& variable) {
return l += variable->get_length();
}) > 4) {
printer->add_line("#include <crout/crout.hpp>");
} else {
printer->add_line("#include <Eigen/Dense>");
printer->add_line("#include <Eigen/LU>");
}
}
}

Expand Down Expand Up @@ -3424,7 +3459,12 @@ void CodegenCVisitor::print_nrn_init(bool skip_init_check) {
print_dt_update_to_device();
}

print_channel_iteration_block_parallel_hint(BlockType::Initial);
if (info.eigen_newton_solver_exist) {
printer->add_line("int solver_error = 0;");
print_channel_iteration_block_parallel_hint(BlockType::Initial, true);
} else {
print_channel_iteration_block_parallel_hint(BlockType::Initial);
}
printer->start_block("for (int id = 0; id < nodecount; id++)");

if (info.net_receive_node != nullptr) {
Expand All @@ -3435,6 +3475,10 @@ void CodegenCVisitor::print_nrn_init(bool skip_init_check) {
printer->end_block(1);
print_shadow_reduction_statements();

if (info.eigen_newton_solver_exist)
printer->add_line(
"if (solver_error > 0) throw std::runtime_error(\"Newton solver did not converge!\");");

if (!info.changed_dt.empty()) {
printer->fmt_line("{} = _save_prev_dt;", get_variable_name(naming::NTHREAD_DT_VARIABLE));
print_dt_update_to_device();
Expand Down Expand Up @@ -4253,9 +4297,14 @@ void CodegenCVisitor::print_nrn_state() {
printer->add_newline(2);
printer->add_line("/** update state */");
print_global_function_common_code(BlockType::State);
print_channel_iteration_block_parallel_hint(BlockType::State);
if (info.eigen_newton_solver_exist || info.eigen_linear_solver_exist) {
printer->add_line("int solver_error = 0;");
print_channel_iteration_block_parallel_hint(BlockType::State, true);
} else {
print_channel_iteration_block_parallel_hint(BlockType::State);
}
printer->start_block("for (int id = 0; id < nodecount; id++)");

printer->add_line("int node_id = node_index[id];");
printer->add_line("double v = voltage[node_id];");
print_v_unused();
Expand Down Expand Up @@ -4295,6 +4344,12 @@ void CodegenCVisitor::print_nrn_state() {
}

print_kernel_data_present_annotation_block_end();

if (info.eigen_newton_solver_exist)
printer->add_line("if (solver_error > 0) throw std::runtime_error(\"Newton solver did not converge!\");");
if (info.eigen_linear_solver_exist)
printer->add_line("if (solver_error > 0) throw std::runtime_error(\"Singular matrices (Crout/Inverse)!\");");

printer->end_block(1);
codegen = false;
}
Expand Down
3 changes: 2 additions & 1 deletion src/codegen/codegen_c_visitor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1263,7 +1263,8 @@ class CodegenCVisitor: public visitor::ConstAstVisitor {
*
* \param type The block type
*/
virtual void print_channel_iteration_block_parallel_hint(BlockType type);
virtual void print_channel_iteration_block_parallel_hint(BlockType type,
bool error_checking = false);


/**
Expand Down
1 change: 1 addition & 0 deletions src/nmodl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,5 @@
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
#endif

#include "crout/crout.hpp"
#include "newton/newton.hpp"
7 changes: 4 additions & 3 deletions src/solver/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,17 @@
# Solver sources
# =============================================================================
set(NEWTON_SOLVER_SOURCE_FILES ${CMAKE_CURRENT_SOURCE_DIR}/newton/newton.hpp)
set(CROUT_SOLVER_SOURCE_FILES ${CMAKE_CURRENT_SOURCE_DIR}/crout/crout.hpp)

# =============================================================================
# Copy necessary files to build directory
# =============================================================================
# Newton
file(GLOB NMODL_NEWTON_SOLVER_HEADER_FILES "${CMAKE_CURRENT_SOURCE_DIR}/newton/*.h*")
file(COPY ${NMODL_NEWTON_SOLVER_HEADER_FILES} DESTINATION ${CMAKE_BINARY_DIR}/include/newton/)
# partial_piv_lu
file(GLOB NMODL_PARTIAL_PIV_LU_API_FILES "${CMAKE_CURRENT_SOURCE_DIR}/partial_piv_lu/*")
file(COPY ${NMODL_PARTIAL_PIV_LU_API_FILES} DESTINATION ${CMAKE_BINARY_DIR}/include/partial_piv_lu/)
# Crout
file(GLOB NMODL_CROUT_SOLVER_HEADER_FILES "${CMAKE_CURRENT_SOURCE_DIR}/crout/*.h*")
file(COPY ${NMODL_CROUT_SOLVER_HEADER_FILES} DESTINATION ${CMAKE_BINARY_DIR}/include/crout/)
# Eigen
file(COPY ${NMODL_PROJECT_SOURCE_DIR}/ext/eigen/Eigen DESTINATION ${CMAKE_BINARY_DIR}/include/)

Expand Down
Loading

0 comments on commit 888f287

Please sign in to comment.