From 95f55079d0c836336bf475eb6added777b966eb4 Mon Sep 17 00:00:00 2001 From: "Chance.H" Date: Mon, 23 Sep 2024 13:22:10 +0800 Subject: [PATCH] to: use cuda Squashed commit of the following: commit d715c1022a94c154263c4ec02382dbe5134e9c2d Author: Chance.H Date: Mon Sep 23 13:13:58 2024 +0800 to: use cuda commit 9a90661bbaf615eedd9f16f0b6deca5b0d0c9759 Author: Chance.H Date: Mon Sep 23 10:56:48 2024 +0800 to: init cuda set commit 8f6366a25ab5a5bae10405dd18c9d9fe72151bc4 Author: Chance.H Date: Mon Sep 23 10:43:44 2024 +0800 to: fix scene --- test/system_test/MassSpring3D/CMakeLists.txt | 6 +- .../MassSpring3D/ClothSimulation.cpp | 52 +++-- .../MassSpring3D/dtkPhysMassSpringSolver.cpp | 210 +++++++++++++----- .../MassSpring3D/include/ClothSimulation.h | 35 +-- test/system_test/MassSpring3D/include/Scene.h | 4 +- .../include/dtkPhysMassSpringSolver.h | 11 +- .../system_test/MassSpring3D/include/step.cuh | 9 + test/system_test/MassSpring3D/main.cpp | 53 +++-- test/system_test/MassSpring3D/step.cu | 62 ++++++ 9 files changed, 324 insertions(+), 118 deletions(-) create mode 100644 test/system_test/MassSpring3D/include/step.cuh create mode 100644 test/system_test/MassSpring3D/step.cu diff --git a/test/system_test/MassSpring3D/CMakeLists.txt b/test/system_test/MassSpring3D/CMakeLists.txt index bd253d3..9d10299 100644 --- a/test/system_test/MassSpring3D/CMakeLists.txt +++ b/test/system_test/MassSpring3D/CMakeLists.txt @@ -11,13 +11,17 @@ add_executable(${PROJECT_NAME} Shader.cpp Renderer.cpp dtkPhysMassSpringSolver.cpp + step.cu ) target_include_directories(${PROJECT_NAME} PRIVATE include ) -target_link_libraries(${PROJECT_NAME} GLEW::GLEW) +# find_package(CUDA REQUIRED) +find_package(CUDAToolkit) + +target_link_libraries(${PROJECT_NAME} GLEW::GLEW CUDA::cudart CUDA::cusparse) if (APPLE) target_compile_definitions(${PROJECT_NAME} PRIVATE diff --git a/test/system_test/MassSpring3D/ClothSimulation.cpp b/test/system_test/MassSpring3D/ClothSimulation.cpp index 0eb417e..0721a0f 100644 --- a/test/system_test/MassSpring3D/ClothSimulation.cpp +++ b/test/system_test/MassSpring3D/ClothSimulation.cpp @@ -14,7 +14,8 @@ */ #include "ClothSimulation.h" -ClothSimulation::ClothSimulation(const unsigned int& windowWidth, const unsigned int& windowHeight, const dtk::dtkDouble3& gravity) : Scene(windowWidth, windowHeight), _gravity(gravity) { +ClothSimulation::ClothSimulation(const unsigned int& windowWidth, const unsigned int& windowHeight, const dtk::dtkDouble3& gravity, const unsigned int& edge_num) : Scene(windowWidth, windowHeight), _gravity(gravity), _param(SystemParam(edge_num)) { + this->Init(); }; const ClothSimulation::ClothMesh ClothSimulation::GetClothMesh() const { @@ -90,7 +91,7 @@ void ClothSimulation::InitShader() { } void ClothSimulation::InitCloth() { - _cloth_mesh = dtkFactory::CreateClothMesh(SystemParam::w, SystemParam::n); + _cloth_mesh = dtkFactory::CreateClothMesh(_param.w, _param.n); ClothDrop(); @@ -107,7 +108,7 @@ void ClothSimulation::InitScene() { glm::vec3(0.618, -0.786, 0.3f) * g_camera_distance, glm::vec3(0.0f, 0.0f, -1.0f), glm::vec3(0.0f, 0.0f, 1.0f) - ) * glm::translate(glm::mat4(1), glm::vec3(0.0f, 0.0f, SystemParam::w / 4)); + ) * glm::translate(glm::mat4(1), glm::vec3(0.0f, 0.0f, _param.w / 4)); g_ProjectionMatrix = glm::perspective(PI / 4.0f, g_windowWidth * 1.0f / g_windowHeight, 0.01f, 1000.0f); }; @@ -116,7 +117,7 @@ void ClothSimulation::SetParameters() { }; void ClothSimulation::ClothDrop() { - _system = dtkFactory::CreateClothMassSpringSystem(_cloth_mesh); + _system = dtkFactory::CreateClothMassSpringSystem(_cloth_mesh, _param); // _sphere_mesh = dtkFactory::CreateSphereMesh(dtk::dtkDouble3(0, 0, -1), 0.64, 20); _solver = dtkFactory::CreateClothMassSpringSolver(_system); @@ -161,6 +162,9 @@ void ClothSimulation::UpdateRenderTarget() { }; dtk::dtkStaticTriangleMesh::Ptr dtkFactory::CreateClothMesh(float w, int n) { + if (n % 2 == 0) + throw std::runtime_error("n must be odd!"); + dtk::dtkStaticTriangleMesh::Ptr result = dtk::dtkStaticTriangleMesh::New(); unsigned int idx = 0; // vertex index @@ -245,26 +249,26 @@ dtk::dtkStaticTriangleMesh::Ptr dtkFactory::CreateSphereMesh(dtk::dtkDouble3 cen return result; } -dtk::dtkPhysMassSpring::Ptr dtkFactory::CreateClothMassSpringSystem(const dtk::dtkStaticTriangleMesh::Ptr& mesh) { - dtk::dtkDouble3 gravity(0, 0, -SystemParam::g); - dtk::dtkPhysMassSpring::Ptr system = dtk::dtkPhysMassSpring::New(SystemParam::m, SystemParam::k, SystemParam::b, SystemParam::a, SystemParam::r, SystemParam::h, gravity); +dtk::dtkPhysMassSpring::Ptr dtkFactory::CreateClothMassSpringSystem(const dtk::dtkStaticTriangleMesh::Ptr& mesh, const SystemParam& _param) { + dtk::dtkDouble3 gravity(0, 0, -_param.g); + dtk::dtkPhysMassSpring::Ptr system = dtk::dtkPhysMassSpring::New(_param.m, _param.k, _param.b, _param.a, _param.r, _param.h, gravity); // system->SetTriangleMesh(mesh); // n must be odd - assert(SystemParam::n % 2 == 1); + assert(_param.n % 2 == 1); // compute n_points and n_springs - unsigned int n_points = SystemParam::n * SystemParam::n; - unsigned int n_springs = (SystemParam::n - 1) * (5 * SystemParam::n - 2); + unsigned int n_points = _param.n * _param.n; + unsigned int n_springs = (_param.n - 1) * (5 * _param.n - 2); system->SetPoints(mesh->GetPoints()); for (unsigned int id = 0; id < n_points; id++) { - system->AddMassPoint(id, SystemParam::m, dtk::dtkT3(0, 0, 0), SystemParam::a, SystemParam::c, gravity); + system->AddMassPoint(id, _param.m, dtk::dtkT3(0, 0, 0), _param.a, _param.c, gravity); } - unsigned int n = SystemParam::n; + unsigned int n = _param.n; double rest_length_factor = 1.05; - for (unsigned int i = 0; i < SystemParam::n; i++) { - for (unsigned int j = 0; j < SystemParam::n; j++) { + for (unsigned int i = 0; i < _param.n; i++) { + for (unsigned int j = 0; j < _param.n; j++) { // bottom right corner if (i == n - 1 && j == n - 1) { continue; @@ -272,11 +276,11 @@ dtk::dtkPhysMassSpring::Ptr dtkFactory::CreateClothMassSpringSystem(const dtk::d if (i == n - 1) { // structural spring - system->AddSpring(n * i + j, n * i + j + 1, SystemParam::k, SystemParam::a, rest_length_factor); + system->AddSpring(n * i + j, n * i + j + 1, _param.k, _param.a, rest_length_factor); // bending spring if (j % 2 == 0) { - system->AddSpring(n * i + j, n * i + j + 2, SystemParam::k, SystemParam::a, rest_length_factor); + system->AddSpring(n * i + j, n * i + j + 2, _param.k, _param.a, rest_length_factor); } continue; } @@ -284,31 +288,31 @@ dtk::dtkPhysMassSpring::Ptr dtkFactory::CreateClothMassSpringSystem(const dtk::d // right edge if (j == n - 1) { // structural spring - system->AddSpring(n * i + j, n * (i + 1) + j, SystemParam::k, SystemParam::a, rest_length_factor); + system->AddSpring(n * i + j, n * (i + 1) + j, _param.k, _param.a, rest_length_factor); // bending spring if (i % 2 == 0) { - system->AddSpring(n * i + j, n * (i + 2) + j, SystemParam::k, SystemParam::a, rest_length_factor); + system->AddSpring(n * i + j, n * (i + 2) + j, _param.k, _param.a, rest_length_factor); } continue; } // structural springs - system->AddSpring(n * i + j, n * i + j + 1, SystemParam::k, SystemParam::a, rest_length_factor); + system->AddSpring(n * i + j, n * i + j + 1, _param.k, _param.a, rest_length_factor); - system->AddSpring(n * i + j, n * (i + 1) + j, SystemParam::k, SystemParam::a, rest_length_factor); + system->AddSpring(n * i + j, n * (i + 1) + j, _param.k, _param.a, rest_length_factor); // shearing springs - system->AddSpring(n * i + j, n * (i + 1) + j + 1, SystemParam::k, SystemParam::a, rest_length_factor); + system->AddSpring(n * i + j, n * (i + 1) + j + 1, _param.k, _param.a, rest_length_factor); - system->AddSpring(n * (i + 1) + j, n * i + j + 1, SystemParam::k, SystemParam::a, rest_length_factor); + system->AddSpring(n * (i + 1) + j, n * i + j + 1, _param.k, _param.a, rest_length_factor); // bending springs if (j % 2 == 0) { - system->AddSpring(n * i + j, n * i + j + 2, SystemParam::k, SystemParam::a, rest_length_factor); + system->AddSpring(n * i + j, n * i + j + 2, _param.k, _param.a, rest_length_factor); } if (i % 2 == 0) { - system->AddSpring(n * i + j, n * (i + 2) + j, SystemParam::k, SystemParam::a, rest_length_factor); + system->AddSpring(n * i + j, n * (i + 2) + j, _param.k, _param.a, rest_length_factor); } } } diff --git a/test/system_test/MassSpring3D/dtkPhysMassSpringSolver.cpp b/test/system_test/MassSpring3D/dtkPhysMassSpringSolver.cpp index 6b488d6..b7be9a5 100644 --- a/test/system_test/MassSpring3D/dtkPhysMassSpringSolver.cpp +++ b/test/system_test/MassSpring3D/dtkPhysMassSpringSolver.cpp @@ -1,4 +1,5 @@ #include "dtkPhysMassSpringSolver.h" +#include "step.cuh" dtk::dtkPhysMassSpringSolver::dtkPhysMassSpringSolver() { } @@ -69,6 +70,18 @@ dtk::dtkPhysMassSpringSolver::dtkPhysMassSpringSolver(const dtk::dtkPhysMassSpri _system_matrix.compute(A); + int num_springs = _system->GetNumberOfSprings(); + _rest_lengths.resize(num_springs); + _spring_indices.resize(2 * num_springs); + + // 初始化弹簧数据 + for (int i = 0; i < num_springs; ++i) { + dtk::dtkPhysSpring* spring = _system->GetSpring(i); + _rest_lengths[i] = spring->GetRestLength(); + _spring_indices[2 * i] = spring->GetFirstVertex()->GetPointID(); + _spring_indices[2 * i + 1] = spring->GetSecondVertex()->GetPointID(); + } + // std::cout << "M: " << std::endl; // printSparseMatrix(_M); // std::cout << "L: " << std::endl; @@ -86,68 +99,159 @@ void dtk::dtkPhysMassSpringSolver::solve(unsigned int iter_num) { _inertial_term = _M * ((damping_factor + 1) * (_current_state)-damping_factor * _prev_state); _prev_state = _current_state; - // std::cout << "damping_factor: " << damping_factor << std::endl; - // std::cout << "M: " << std::endl; - // printSparseMatrix(_M); - // std::cout << "_current_state: " << std::endl << _current_state << std::endl; - // std::cout << "_prev_state: " << std::endl << _prev_state << std::endl; - // std::cout << "_internal_term: " << std::endl << _inertial_term << std::endl; - // perform steps + bool use_cuda = true; for (unsigned int i = 0; i < iter_num; i++) { - localStep(); - globalStep(); + step(use_cuda); } - - // std::cout << "_current_state: " << std::endl << _current_state << std::endl; } -void dtk::dtkPhysMassSpringSolver::localStep() { - for (dtk::dtkID id = 0;id < _system->GetNumberOfSprings();id++) { - dtk::dtkPhysSpring* spring = _system->GetSpring(id); - dtk::dtkID id1 = spring->GetFirstVertex()->GetPointID(); - dtk::dtkID id2 = spring->GetSecondVertex()->GetPointID(); - double rest_length = spring->GetRestLength(); - Vector3f p12( - _current_state[3 * id1 + 0] - _current_state[3 * id2 + 0], - _current_state[3 * id1 + 1] - _current_state[3 * id2 + 1], - _current_state[3 * id1 + 2] - _current_state[3 * id2 + 2] - ); - - p12.normalize(); - _spring_directions[3 * id + 0] = rest_length * p12[0]; - _spring_directions[3 * id + 1] = rest_length * p12[1]; - _spring_directions[3 * id + 2] = rest_length * p12[2]; +extern "C" void run_local_step_kernel(const float* current_state, float* spring_directions, const float* rest_lengths, const int* spring_indices, int num_springs); + +void dtk::dtkPhysMassSpringSolver::step(bool use_cuda) { + if (use_cuda) { + // ***** CUDA Setup ***** // + cusparseHandle_t handle; + cusparseCreate(&handle); + + int num_springs = _system->GetNumberOfSprings(); + int num_points = _system->GetNumberOfMassPoints(); + float h2 = _system->GetTimeStep() * _system->GetTimeStep(); + + // 分配 GPU 内存 + float* d_current_state, * d_spring_directions, * d_inertial_term, * d_fext_force, * d_b; + float* d_rest_lengths; + int* d_spring_indices; + + cudaMalloc(&d_current_state, _current_state.size() * sizeof(float)); + cudaMalloc(&d_spring_directions, _spring_directions.size() * sizeof(float)); + cudaMalloc(&d_inertial_term, _inertial_term.size() * sizeof(float)); + cudaMalloc(&d_fext_force, num_points * 3 * sizeof(float)); // Assuming 3D points + cudaMalloc(&d_b, _inertial_term.size() * sizeof(float)); + cudaMalloc(&d_rest_lengths, num_springs * sizeof(float)); + cudaMalloc(&d_spring_indices, 2 * num_springs * sizeof(int)); // 每个弹簧有两个顶点 + + // 将数据从 CPU 拷贝到 GPU + cudaMemcpy(d_current_state, _current_state.data(), _current_state.size() * sizeof(float), cudaMemcpyHostToDevice); + cudaMemcpy(d_rest_lengths, _rest_lengths.data(), num_springs * sizeof(float), cudaMemcpyHostToDevice); + cudaMemcpy(d_spring_indices, _spring_indices.data(), 2 * num_springs * sizeof(int), cudaMemcpyHostToDevice); + + // 调用 CUDA 函数执行 local step + run_local_step_kernel(d_current_state, d_spring_directions, d_rest_lengths, d_spring_indices, num_springs); + + // 拷贝结果回 CPU + cudaMemcpy(_spring_directions.data(), d_spring_directions, _spring_directions.size() * sizeof(float), cudaMemcpyDeviceToHost); + + // ***** 外力计算 ***** // + dtk::dtkDouble3 fext = _system->GetDefaultGravityAccel(); + Vector3f fext_vector(fext.x, fext.y, fext.z); + VectorXf fext_force = fext_vector.replicate(num_points, 1); + cudaMemcpy(d_fext_force, fext_force.data(), num_points * 3 * sizeof(float), cudaMemcpyHostToDevice); + + // ***** Right Hand Side (RHS) 的计算 ***** // + VectorXf rhs = _inertial_term + h2 * _J * _spring_directions + h2 * fext_force; + cudaMemcpy(d_b, rhs.data(), rhs.size() * sizeof(float), cudaMemcpyHostToDevice); + + // ***** cuSPARSE 矩阵求解 ***** // + Eigen::SparseMatrix system_matrix_eigen = _system_matrix.matrixL(); // 获取分解的下三角矩阵 + int nnz = system_matrix_eigen.nonZeros(); + int rows = system_matrix_eigen.rows(); + int cols = system_matrix_eigen.cols(); + + // 生成 CSR 格式的数据 + std::vector csrRowPtr(rows + 1), csrColInd(nnz); + std::vector csrVal(nnz); + int idx = 0; + for (int k = 0; k < system_matrix_eigen.outerSize(); ++k) { + csrRowPtr[k] = idx; + for (Eigen::SparseMatrix::InnerIterator it(system_matrix_eigen, k); it; ++it) { + csrVal[idx] = it.value(); + csrColInd[idx] = it.col(); + idx++; + } + } + csrRowPtr[rows] = nnz; + + // 在 GPU 上分配 CSR 矩阵 + int* d_csrRowPtr, * d_csrColInd; + float* d_csrVal; + cudaMalloc(&d_csrRowPtr, (rows + 1) * sizeof(int)); + cudaMalloc(&d_csrColInd, nnz * sizeof(int)); + cudaMalloc(&d_csrVal, nnz * sizeof(float)); + cudaMemcpy(d_csrRowPtr, csrRowPtr.data(), (rows + 1) * sizeof(int), cudaMemcpyHostToDevice); + cudaMemcpy(d_csrColInd, csrColInd.data(), nnz * sizeof(int), cudaMemcpyHostToDevice); + cudaMemcpy(d_csrVal, csrVal.data(), nnz * sizeof(float), cudaMemcpyHostToDevice); + + // 使用 cuSPARSE 进行求解 + float* d_x; + cudaMalloc(&d_x, rhs.size() * sizeof(float)); + + cusparseMatDescr_t descr; + cusparseCreateMatDescr(&descr); + + // 定义 alpha 和 beta + float alpha = 1.0f; + float beta = 0.0f; + + // 使用 cuSPARSE API 进行矩阵-向量乘法 + cusparseSpMatDescr_t matA; + cusparseDnVecDescr_t vecX, vecY; + cusparseCreateCsr(&matA, rows, cols, nnz, d_csrRowPtr, d_csrColInd, d_csrVal, + CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F); + cusparseCreateDnVec(&vecX, rhs.size(), d_b, CUDA_R_32F); + cusparseCreateDnVec(&vecY, rhs.size(), d_x, CUDA_R_32F); + + cusparseSpMV(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, matA, vecX, &beta, vecY, + CUDA_R_32F, CUSPARSE_SPMV_ALG_DEFAULT, nullptr); + + // 将解拷贝回 CPU + cudaMemcpy(_current_state.data(), d_x, rhs.size() * sizeof(float), cudaMemcpyDeviceToHost); + + // 释放 GPU 资源 + cudaFree(d_current_state); + cudaFree(d_spring_directions); + cudaFree(d_inertial_term); + cudaFree(d_fext_force); + cudaFree(d_b); + cudaFree(d_x); + cudaFree(d_csrRowPtr); + cudaFree(d_csrColInd); + cudaFree(d_csrVal); + cudaFree(d_rest_lengths); + cudaFree(d_spring_indices); + cusparseDestroyMatDescr(descr); + cusparseDestroy(handle); } - // std::cout << "_spring_directions: " << std::endl << _spring_directions << std::endl; -} + else { + // 原有 CPU 代码 + for (dtk::dtkID id = 0; id < _system->GetNumberOfSprings(); id++) { + dtk::dtkPhysSpring* spring = _system->GetSpring(id); + dtk::dtkID id1 = spring->GetFirstVertex()->GetPointID(); + dtk::dtkID id2 = spring->GetSecondVertex()->GetPointID(); + double rest_length = spring->GetRestLength(); + Vector3f p12( + _current_state[3 * id1 + 0] - _current_state[3 * id2 + 0], + _current_state[3 * id1 + 1] - _current_state[3 * id2 + 1], + _current_state[3 * id1 + 2] - _current_state[3 * id2 + 2] + ); -void dtk::dtkPhysMassSpringSolver::globalStep() { - float h2 = _system->GetTimeStep() * _system->GetTimeStep(); // shorthand - - // compute right hand side - // dtk::dtkDouble3 fext(0, 0, 0); - dtk::dtkDouble3 fext = _system->GetDefaultGravityAccel(); // TODO: change to _system->GetImpulseForce() - VectorXf fext_force = VectorXf(Vector3f(fext.x, fext.y, fext.z).replicate(_system->GetNumberOfMassPoints(), 1)); - - // // 生成随机外力 - // std::random_device rd; - // std::mt19937 gen(rd()); - // std::uniform_real_distribution<> dis(-1.0, 1.0); - // dtk::dtkDouble3 random_force(dis(gen), dis(gen), dis(gen)); - // // 随机选择一个质点 - // std::uniform_int_distribution<> point_dis(0, _system->GetNumberOfMassPoints() - 1); - // int random_point = point_dis(gen); - // fext_force.segment<3>(random_point * 3) += Vector3f(random_force.x, random_force.y, random_force.z); - - VectorXf b = _inertial_term + h2 * _J * _spring_directions + h2 * fext_force; - // std::cout << "b: " << std::endl << b << std::endl; - - // solve system and update state - _current_state = _system_matrix.solve(b); - // std::cout << "_current_state: " << std::endl << _current_state << std::endl; + p12.normalize(); + _spring_directions[3 * id + 0] = rest_length * p12[0]; + _spring_directions[3 * id + 1] = rest_length * p12[1]; + _spring_directions[3 * id + 2] = rest_length * p12[2]; + } + + float h2 = _system->GetTimeStep() * _system->GetTimeStep(); + dtk::dtkDouble3 fext = _system->GetDefaultGravityAccel(); + VectorXf fext_force = VectorXf(Vector3f(fext.x, fext.y, fext.z).replicate(_system->GetNumberOfMassPoints(), 1)); + + VectorXf b = _inertial_term + h2 * _J * _spring_directions + h2 * fext_force; + _current_state = _system_matrix.solve(b); + } } + void dtk::dtkPhysMassSpringSolver::satisfy(ClothDropType type) { if (type == Sphere) { const float radius = 0.64f; diff --git a/test/system_test/MassSpring3D/include/ClothSimulation.h b/test/system_test/MassSpring3D/include/ClothSimulation.h index 17b2c1d..3f3c793 100644 --- a/test/system_test/MassSpring3D/include/ClothSimulation.h +++ b/test/system_test/MassSpring3D/include/ClothSimulation.h @@ -38,23 +38,29 @@ #include "dtkPhysMassSpringSolver.h" #include "dtkPhysMassSpringCollisionResponse.h" -namespace SystemParam { - static const int n = 33; // must be odd, n * n = n_vertices | 61 - static const float w = 2.0f; // width | 2.0f - static const float h = 0.008f; // time step, smaller for better results | 0.008f = 0.016f/2 - static const float r = w / (n - 1) * 1.05f; // spring rest legnth - static const float k = 1.0f; // spring stiffness | 1.0f; - static const float m = 0.05f / (n * n); // point mass | 0.25f - static const float a = 0.993f; // point damping, close to 1.0 | 0.993f - static const float b = 5880.0f; // damping | 5880.0f - static const float c = 2.5f; // point resistence | 2.5f - static const float g = 9.8f * m; // gravitational force | 9.8f -} +class SystemParam { +public: + SystemParam(int n = 33, float w = 2.0f, float h = 0.008f, float k = 1.0f, + float a = 0.993f, float b = 5880.0f, float c = 2.5f) + : n(n), w(w), h(h), r(w / (n - 1) * 1.05f), k(k), + m(0.05f / (n * n)), a(a), b(b), c(c), g(9.8f * m) {} + + const int n; // must be odd, n * n = n_vertices | 33 + const float w; // width | 2.0f + const float h; // time step, smaller for better results | 0.008f = 0.016f/2 + const float r; // spring rest length + const float k; // spring stiffness | 1.0f + const float m; // point mass | 0.25f + const float a; // point damping, close to 1.0 | 0.993f + const float b; // damping | 5880.0f + const float c; // point resistance | 2.5f + const float g; // gravitational force | 9.8f +}; class ClothSimulation : public Scene { typedef Eigen::VectorXf VectorXf; public: - ClothSimulation(const unsigned int& windowWidth, const unsigned int& windowHeight, const dtk::dtkDouble3& gravity); + ClothSimulation(const unsigned int& windowWidth, const unsigned int& windowHeight, const dtk::dtkDouble3& gravity, const unsigned int& edge_num); ~ClothSimulation() = default; using ClothMesh = dtk::dtkStaticTriangleMesh::Ptr; @@ -102,6 +108,7 @@ class ClothSimulation : public Scene { // 重力 dtk::dtkDouble3 _gravity; + SystemParam _param; ClothMesh _cloth_mesh; SphereMesh _sphere_mesh; ClothMassSpring _system; @@ -115,7 +122,7 @@ class ClothSimulation : public Scene { class dtkFactory { public: static dtk::dtkStaticTriangleMesh::Ptr CreateClothMesh(float w, int n); - static dtk::dtkPhysMassSpring::Ptr CreateClothMassSpringSystem(const dtk::dtkStaticTriangleMesh::Ptr& mesh); + static dtk::dtkPhysMassSpring::Ptr CreateClothMassSpringSystem(const dtk::dtkStaticTriangleMesh::Ptr& mesh, const SystemParam& _param); static dtk::dtkPhysMassSpringSolver::Ptr CreateClothMassSpringSolver(const dtk::dtkPhysMassSpring::Ptr& system); static dtk::dtkStaticTriangleMesh::Ptr CreateSphereMesh(dtk::dtkDouble3 center, float radius, int n); }; diff --git a/test/system_test/MassSpring3D/include/Scene.h b/test/system_test/MassSpring3D/include/Scene.h index dbc7a14..f7b691e 100644 --- a/test/system_test/MassSpring3D/include/Scene.h +++ b/test/system_test/MassSpring3D/include/Scene.h @@ -9,10 +9,12 @@ class Scene { SceneVisibility Visibility; public: Scene() : State(SCENE_ACTIVE), Visibility(SCENE_VISIBLE) {}; - Scene(const unsigned int& width, const unsigned int& height) : State(SCENE_PAUSE), Visibility(SCENE_HIDDEN), g_windowWidth(width), g_windowHeight(height) {}; + Scene(const unsigned int& width, const unsigned int& height) : State(SCENE_ACTIVE), Visibility(SCENE_VISIBLE), g_windowWidth(width), g_windowHeight(height) {}; virtual void Init() = 0; virtual void Update(float dt) = 0; virtual void Render() = 0; + virtual void CleanUp() = 0; + virtual void move(const dtk::dtkDouble2& v) = 0; bool IsPause() const { return State == SCENE_PAUSE; diff --git a/test/system_test/MassSpring3D/include/dtkPhysMassSpringSolver.h b/test/system_test/MassSpring3D/include/dtkPhysMassSpringSolver.h index 1e30e73..22087db 100644 --- a/test/system_test/MassSpring3D/include/dtkPhysMassSpringSolver.h +++ b/test/system_test/MassSpring3D/include/dtkPhysMassSpringSolver.h @@ -7,6 +7,9 @@ #include #include +#include +#include + namespace dtk { class dtkPhysMassSpringSolver { private: @@ -49,9 +52,7 @@ namespace dtk { dtkPhysMassSpringSolver(); dtkPhysMassSpringSolver(const dtkPhysMassSpring::Ptr& massSpring); - void localStep(); - void globalStep(); - + void step(bool use_cuda = false); Cholesky _system_matrix; dtkPhysMassSpring::Ptr _system; @@ -66,6 +67,10 @@ namespace dtk { VectorXf _spring_directions; // d, spring directions VectorXf _inertial_term; /**< 惯性项 = M * y, y = (a + 1) * q(n) - a * q(n - 1), a = damp_factor */ + std::vector _rest_lengths; // 存储每个弹簧的初始长度 + std::vector _spring_indices; // 存储每个弹簧的两个顶点的索引(大小为 2 * num_springs) + + float _time_step; }; } diff --git a/test/system_test/MassSpring3D/include/step.cuh b/test/system_test/MassSpring3D/include/step.cuh new file mode 100644 index 0000000..bfcd356 --- /dev/null +++ b/test/system_test/MassSpring3D/include/step.cuh @@ -0,0 +1,9 @@ +#ifndef STEP_CUH +#define STEP_CUH +#include +#include + +extern "C" void run_local_step_kernel(const float* current_state, float* spring_directions, + const float* rest_lengths, const int* spring_indices, + int num_springs); +#endif \ No newline at end of file diff --git a/test/system_test/MassSpring3D/main.cpp b/test/system_test/MassSpring3D/main.cpp index 7fd6c00..abd67a3 100644 --- a/test/system_test/MassSpring3D/main.cpp +++ b/test/system_test/MassSpring3D/main.cpp @@ -29,16 +29,20 @@ #include "Renderer.h" static auto last_clock = std::chrono::high_resolution_clock::now(); -static auto init_clock = std::chrono::high_resolution_clock::now(); // The Width of the screen const unsigned int WINDOW_WIDTH = 800; // The height of the screen const unsigned int WINDOW_HEIGHT = 600; -static ClothSimulation scene(WINDOW_WIDTH, WINDOW_HEIGHT, { 0, -9.8 }); -// static ProgramInput* g_render_target; // vertex, index +Scene* current_scene = nullptr; + +double fps = 0.0; +double total_time = 0.0; +int total_frames = 0; +auto last_time = std::chrono::high_resolution_clock::now(); char INSTRUCTION = '0'; +int I1_EDGE_NUM = 33; static void checkGlErrors(); @@ -78,9 +82,14 @@ void display() { auto now = std::chrono::high_resolution_clock::now(); auto dt = std::chrono::duration_cast>( - now - last_clock) + now - last_time) .count(); - last_clock = now; + last_time = now; + + total_time += dt; + total_frames++; + last_time = now; + double fps = total_frames / total_time; int h = glutGet(GLUT_WINDOW_HEIGHT); int w = glutGet(GLUT_WINDOW_WIDTH); @@ -90,21 +99,19 @@ void display() { draw_text(5, 40, "Push [1-1] to switch scene"); draw_text(w - 150, h - 20, "refer: apollonia"); - if (scene.IsPause()) + draw_text(5, h - 40, "%.2f FPS", fps); + if (current_scene->IsPause()) draw_text(5, h - 20, "dt: %.2f ms PAUSED", dt * 1000); else draw_text(5, h - 20, "dt: %.2f ms", dt * 1000); - scene.Update(std::min(dt, 0.08)); - scene.Render(); - - GLenum err; - while ((err = glGetError()) != GL_NO_ERROR) { - std::cerr << "OpenGL error: " << err << std::endl; - } + current_scene->Update(dt); + current_scene->Render(); glutSwapBuffers(); checkGlErrors(); + + glutPostRedisplay(); } void reshape(int width, int height) { @@ -116,12 +123,12 @@ void reshape(int width, int height) { void mouse(int button, int state, int x, int y) {} -void move_pos(const dtk::dtkDouble2& v) { scene.move(v); } +void move_pos(const dtk::dtkDouble2& v) { current_scene->move(v); } void keyboard(unsigned char key, int x, int y) { switch (key) { case '1': - scene.SetVisible(!scene.IsVisible()); + current_scene = new ClothSimulation(WINDOW_WIDTH, WINDOW_HEIGHT, { 0, -9.8 }, I1_EDGE_NUM); break; case 'w': move_pos(dtk::dtkDouble2(0, 1)); @@ -136,7 +143,7 @@ void keyboard(unsigned char key, int x, int y) { move_pos(dtk::dtkDouble2(1, 0)); break; case ' ': - scene.SetPause(!scene.IsPause()); + current_scene->SetPause(!current_scene->IsPause()); break; case 27: exit(0); @@ -176,10 +183,6 @@ static void initGlutState(int argc, char** argv, const char* window_title = "", static void initGlewState() { GLenum err = glewInit(); - if (!glewIsSupported("GL_VERSION_2_0")) { - printf("OpenGL 2.0 not supported\n"); - exit(1); - } if (err != GLEW_OK) { std::cerr << "Error initializing GLEW: " << glewGetErrorString(err) << std::endl; exit(0); @@ -216,6 +219,12 @@ int main(int argc, char* argv[]) { if (std::strcmp(argv[i], "--instruction") == 0 && i + 1 < argc) { INSTRUCTION = argv[++i][0]; } + + if (INSTRUCTION == '1') { + if (std::strcmp(argv[i], "--edge_num") == 0 && i + 1 < argc) { + I1_EDGE_NUM = std::stoi(argv[++i]); + } + } } try { @@ -224,7 +233,6 @@ int main(int argc, char* argv[]) { initGlewState(); initGLState(); - scene.Init(); checkGlErrors(); if (INSTRUCTION != '0') { @@ -232,7 +240,8 @@ int main(int argc, char* argv[]) { } glutMainLoop(); - scene.CleanUp(); + current_scene->CleanUp(); + delete current_scene; return 0; } catch (const std::runtime_error& e) { diff --git a/test/system_test/MassSpring3D/step.cu b/test/system_test/MassSpring3D/step.cu new file mode 100644 index 0000000..53e9012 --- /dev/null +++ b/test/system_test/MassSpring3D/step.cu @@ -0,0 +1,62 @@ +#include "step.cuh" + +// CUDA 核函数的声明 +__global__ void local_step_kernel(const float* __restrict__ current_state, float* __restrict__ spring_directions, + const float* __restrict__ rest_lengths, const int* __restrict__ spring_indices, + int num_springs); + +// CUDA 调用函数 +extern "C" void run_local_step_kernel(const float* current_state, float* spring_directions, + const float* rest_lengths, const int* spring_indices, + int num_springs) +{ + // 计算 CUDA 核函数需要的线程数和块数 + int blockSize = 256; + int numBlocks = (num_springs + blockSize - 1) / blockSize; + + // 调用 CUDA 核函数 + local_step_kernel<<>>(current_state, spring_directions, rest_lengths, spring_indices, num_springs); + + // 检查 CUDA 错误 + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) { + fprintf(stderr, "CUDA kernel failed: %s\n", cudaGetErrorString(err)); + } + + // 同步 CUDA 设备 + cudaDeviceSynchronize(); +} + +// CUDA 核函数的定义 +__global__ void local_step_kernel(const float* __restrict__ current_state, float* __restrict__ spring_directions, + const float* __restrict__ rest_lengths, const int* __restrict__ spring_indices, + int num_springs) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= num_springs) return; // 如果 idx 超过弹簧数量则返回 + + // 获取当前弹簧的两个点的索引 + int id1 = spring_indices[2 * idx]; // 第一个顶点的ID + int id2 = spring_indices[2 * idx + 1]; // 第二个顶点的ID + + // 计算两个点之间的向量 p12 + float p12_x = current_state[3 * id1 + 0] - current_state[3 * id2 + 0]; + float p12_y = current_state[3 * id1 + 1] - current_state[3 * id2 + 1]; + float p12_z = current_state[3 * id1 + 2] - current_state[3 * id2 + 2]; + + // 计算 p12 的长度(归一化向量) + float length = sqrtf(p12_x * p12_x + p12_y * p12_y + p12_z * p12_z); + + if (length > 1e-6) { + // 归一化向量 + p12_x /= length; + p12_y /= length; + p12_z /= length; + + // 使用 rest_length 计算方向 + float rest_length = rest_lengths[idx]; + spring_directions[3 * idx + 0] = rest_length * p12_x; + spring_directions[3 * idx + 1] = rest_length * p12_y; + spring_directions[3 * idx + 2] = rest_length * p12_z; + } +}