Skip to content

Commit

Permalink
to: use cuda
Browse files Browse the repository at this point in the history
Squashed commit of the following:

commit d715c1022a94c154263c4ec02382dbe5134e9c2d
Author: Chance.H <[email protected]>
Date:   Mon Sep 23 13:13:58 2024 +0800

    to: use cuda

commit 9a90661bbaf615eedd9f16f0b6deca5b0d0c9759
Author: Chance.H <[email protected]>
Date:   Mon Sep 23 10:56:48 2024 +0800

    to: init cuda set

commit 8f6366a25ab5a5bae10405dd18c9d9fe72151bc4
Author: Chance.H <[email protected]>
Date:   Mon Sep 23 10:43:44 2024 +0800

    to: fix scene
  • Loading branch information
For-Chance committed Sep 23, 2024
1 parent 8cdb56f commit 95f5507
Show file tree
Hide file tree
Showing 9 changed files with 324 additions and 118 deletions.
6 changes: 5 additions & 1 deletion test/system_test/MassSpring3D/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,17 @@ add_executable(${PROJECT_NAME}
Shader.cpp
Renderer.cpp
dtkPhysMassSpringSolver.cpp
step.cu
)

target_include_directories(${PROJECT_NAME} PRIVATE
include
)

target_link_libraries(${PROJECT_NAME} GLEW::GLEW)
# find_package(CUDA REQUIRED)
find_package(CUDAToolkit)

target_link_libraries(${PROJECT_NAME} GLEW::GLEW CUDA::cudart CUDA::cusparse)

if (APPLE)
target_compile_definitions(${PROJECT_NAME} PRIVATE
Expand Down
52 changes: 28 additions & 24 deletions test/system_test/MassSpring3D/ClothSimulation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
*/
#include "ClothSimulation.h"

ClothSimulation::ClothSimulation(const unsigned int& windowWidth, const unsigned int& windowHeight, const dtk::dtkDouble3& gravity) : Scene(windowWidth, windowHeight), _gravity(gravity) {
ClothSimulation::ClothSimulation(const unsigned int& windowWidth, const unsigned int& windowHeight, const dtk::dtkDouble3& gravity, const unsigned int& edge_num) : Scene(windowWidth, windowHeight), _gravity(gravity), _param(SystemParam(edge_num)) {
this->Init();
};

const ClothSimulation::ClothMesh ClothSimulation::GetClothMesh() const {
Expand Down Expand Up @@ -90,7 +91,7 @@ void ClothSimulation::InitShader() {
}

void ClothSimulation::InitCloth() {
_cloth_mesh = dtkFactory::CreateClothMesh(SystemParam::w, SystemParam::n);
_cloth_mesh = dtkFactory::CreateClothMesh(_param.w, _param.n);

ClothDrop();

Expand All @@ -107,7 +108,7 @@ void ClothSimulation::InitScene() {
glm::vec3(0.618, -0.786, 0.3f) * g_camera_distance,
glm::vec3(0.0f, 0.0f, -1.0f),
glm::vec3(0.0f, 0.0f, 1.0f)
) * glm::translate(glm::mat4(1), glm::vec3(0.0f, 0.0f, SystemParam::w / 4));
) * glm::translate(glm::mat4(1), glm::vec3(0.0f, 0.0f, _param.w / 4));
g_ProjectionMatrix = glm::perspective(PI / 4.0f, g_windowWidth * 1.0f / g_windowHeight, 0.01f, 1000.0f);
};

Expand All @@ -116,7 +117,7 @@ void ClothSimulation::SetParameters() {
};

void ClothSimulation::ClothDrop() {
_system = dtkFactory::CreateClothMassSpringSystem(_cloth_mesh);
_system = dtkFactory::CreateClothMassSpringSystem(_cloth_mesh, _param);
// _sphere_mesh = dtkFactory::CreateSphereMesh(dtk::dtkDouble3(0, 0, -1), 0.64, 20);

_solver = dtkFactory::CreateClothMassSpringSolver(_system);
Expand Down Expand Up @@ -161,6 +162,9 @@ void ClothSimulation::UpdateRenderTarget() {
};

dtk::dtkStaticTriangleMesh::Ptr dtkFactory::CreateClothMesh(float w, int n) {
if (n % 2 == 0)
throw std::runtime_error("n must be odd!");

dtk::dtkStaticTriangleMesh::Ptr result = dtk::dtkStaticTriangleMesh::New();

unsigned int idx = 0; // vertex index
Expand Down Expand Up @@ -245,70 +249,70 @@ dtk::dtkStaticTriangleMesh::Ptr dtkFactory::CreateSphereMesh(dtk::dtkDouble3 cen
return result;
}

dtk::dtkPhysMassSpring::Ptr dtkFactory::CreateClothMassSpringSystem(const dtk::dtkStaticTriangleMesh::Ptr& mesh) {
dtk::dtkDouble3 gravity(0, 0, -SystemParam::g);
dtk::dtkPhysMassSpring::Ptr system = dtk::dtkPhysMassSpring::New(SystemParam::m, SystemParam::k, SystemParam::b, SystemParam::a, SystemParam::r, SystemParam::h, gravity);
dtk::dtkPhysMassSpring::Ptr dtkFactory::CreateClothMassSpringSystem(const dtk::dtkStaticTriangleMesh::Ptr& mesh, const SystemParam& _param) {
dtk::dtkDouble3 gravity(0, 0, -_param.g);
dtk::dtkPhysMassSpring::Ptr system = dtk::dtkPhysMassSpring::New(_param.m, _param.k, _param.b, _param.a, _param.r, _param.h, gravity);
// system->SetTriangleMesh(mesh);

// n must be odd
assert(SystemParam::n % 2 == 1);
assert(_param.n % 2 == 1);

// compute n_points and n_springs
unsigned int n_points = SystemParam::n * SystemParam::n;
unsigned int n_springs = (SystemParam::n - 1) * (5 * SystemParam::n - 2);
unsigned int n_points = _param.n * _param.n;
unsigned int n_springs = (_param.n - 1) * (5 * _param.n - 2);
system->SetPoints(mesh->GetPoints());
for (unsigned int id = 0; id < n_points; id++) {
system->AddMassPoint(id, SystemParam::m, dtk::dtkT3<double>(0, 0, 0), SystemParam::a, SystemParam::c, gravity);
system->AddMassPoint(id, _param.m, dtk::dtkT3<double>(0, 0, 0), _param.a, _param.c, gravity);
}

unsigned int n = SystemParam::n;
unsigned int n = _param.n;
double rest_length_factor = 1.05;
for (unsigned int i = 0; i < SystemParam::n; i++) {
for (unsigned int j = 0; j < SystemParam::n; j++) {
for (unsigned int i = 0; i < _param.n; i++) {
for (unsigned int j = 0; j < _param.n; j++) {
// bottom right corner
if (i == n - 1 && j == n - 1) {
continue;
}

if (i == n - 1) {
// structural spring
system->AddSpring(n * i + j, n * i + j + 1, SystemParam::k, SystemParam::a, rest_length_factor);
system->AddSpring(n * i + j, n * i + j + 1, _param.k, _param.a, rest_length_factor);

// bending spring
if (j % 2 == 0) {
system->AddSpring(n * i + j, n * i + j + 2, SystemParam::k, SystemParam::a, rest_length_factor);
system->AddSpring(n * i + j, n * i + j + 2, _param.k, _param.a, rest_length_factor);
}
continue;
}

// right edge
if (j == n - 1) {
// structural spring
system->AddSpring(n * i + j, n * (i + 1) + j, SystemParam::k, SystemParam::a, rest_length_factor);
system->AddSpring(n * i + j, n * (i + 1) + j, _param.k, _param.a, rest_length_factor);

// bending spring
if (i % 2 == 0) {
system->AddSpring(n * i + j, n * (i + 2) + j, SystemParam::k, SystemParam::a, rest_length_factor);
system->AddSpring(n * i + j, n * (i + 2) + j, _param.k, _param.a, rest_length_factor);
}
continue;
}

// structural springs
system->AddSpring(n * i + j, n * i + j + 1, SystemParam::k, SystemParam::a, rest_length_factor);
system->AddSpring(n * i + j, n * i + j + 1, _param.k, _param.a, rest_length_factor);

system->AddSpring(n * i + j, n * (i + 1) + j, SystemParam::k, SystemParam::a, rest_length_factor);
system->AddSpring(n * i + j, n * (i + 1) + j, _param.k, _param.a, rest_length_factor);

// shearing springs
system->AddSpring(n * i + j, n * (i + 1) + j + 1, SystemParam::k, SystemParam::a, rest_length_factor);
system->AddSpring(n * i + j, n * (i + 1) + j + 1, _param.k, _param.a, rest_length_factor);

system->AddSpring(n * (i + 1) + j, n * i + j + 1, SystemParam::k, SystemParam::a, rest_length_factor);
system->AddSpring(n * (i + 1) + j, n * i + j + 1, _param.k, _param.a, rest_length_factor);

// bending springs
if (j % 2 == 0) {
system->AddSpring(n * i + j, n * i + j + 2, SystemParam::k, SystemParam::a, rest_length_factor);
system->AddSpring(n * i + j, n * i + j + 2, _param.k, _param.a, rest_length_factor);
}
if (i % 2 == 0) {
system->AddSpring(n * i + j, n * (i + 2) + j, SystemParam::k, SystemParam::a, rest_length_factor);
system->AddSpring(n * i + j, n * (i + 2) + j, _param.k, _param.a, rest_length_factor);
}
}
}
Expand Down
210 changes: 157 additions & 53 deletions test/system_test/MassSpring3D/dtkPhysMassSpringSolver.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "dtkPhysMassSpringSolver.h"
#include "step.cuh"

dtk::dtkPhysMassSpringSolver::dtkPhysMassSpringSolver() {
}
Expand Down Expand Up @@ -69,6 +70,18 @@ dtk::dtkPhysMassSpringSolver::dtkPhysMassSpringSolver(const dtk::dtkPhysMassSpri
_system_matrix.compute(A);


int num_springs = _system->GetNumberOfSprings();
_rest_lengths.resize(num_springs);
_spring_indices.resize(2 * num_springs);

// 初始化弹簧数据
for (int i = 0; i < num_springs; ++i) {
dtk::dtkPhysSpring* spring = _system->GetSpring(i);
_rest_lengths[i] = spring->GetRestLength();
_spring_indices[2 * i] = spring->GetFirstVertex()->GetPointID();
_spring_indices[2 * i + 1] = spring->GetSecondVertex()->GetPointID();
}

// std::cout << "M: " << std::endl;
// printSparseMatrix(_M);
// std::cout << "L: " << std::endl;
Expand All @@ -86,68 +99,159 @@ void dtk::dtkPhysMassSpringSolver::solve(unsigned int iter_num) {
_inertial_term = _M * ((damping_factor + 1) * (_current_state)-damping_factor * _prev_state);
_prev_state = _current_state;

// std::cout << "damping_factor: " << damping_factor << std::endl;
// std::cout << "M: " << std::endl;
// printSparseMatrix(_M);
// std::cout << "_current_state: " << std::endl << _current_state << std::endl;
// std::cout << "_prev_state: " << std::endl << _prev_state << std::endl;
// std::cout << "_internal_term: " << std::endl << _inertial_term << std::endl;

// perform steps
bool use_cuda = true;
for (unsigned int i = 0; i < iter_num; i++) {
localStep();
globalStep();
step(use_cuda);
}

// std::cout << "_current_state: " << std::endl << _current_state << std::endl;
}

void dtk::dtkPhysMassSpringSolver::localStep() {
for (dtk::dtkID id = 0;id < _system->GetNumberOfSprings();id++) {
dtk::dtkPhysSpring* spring = _system->GetSpring(id);
dtk::dtkID id1 = spring->GetFirstVertex()->GetPointID();
dtk::dtkID id2 = spring->GetSecondVertex()->GetPointID();
double rest_length = spring->GetRestLength();
Vector3f p12(
_current_state[3 * id1 + 0] - _current_state[3 * id2 + 0],
_current_state[3 * id1 + 1] - _current_state[3 * id2 + 1],
_current_state[3 * id1 + 2] - _current_state[3 * id2 + 2]
);

p12.normalize();
_spring_directions[3 * id + 0] = rest_length * p12[0];
_spring_directions[3 * id + 1] = rest_length * p12[1];
_spring_directions[3 * id + 2] = rest_length * p12[2];
extern "C" void run_local_step_kernel(const float* current_state, float* spring_directions, const float* rest_lengths, const int* spring_indices, int num_springs);

void dtk::dtkPhysMassSpringSolver::step(bool use_cuda) {
if (use_cuda) {
// ***** CUDA Setup ***** //
cusparseHandle_t handle;
cusparseCreate(&handle);

int num_springs = _system->GetNumberOfSprings();
int num_points = _system->GetNumberOfMassPoints();
float h2 = _system->GetTimeStep() * _system->GetTimeStep();

// 分配 GPU 内存
float* d_current_state, * d_spring_directions, * d_inertial_term, * d_fext_force, * d_b;
float* d_rest_lengths;
int* d_spring_indices;

cudaMalloc(&d_current_state, _current_state.size() * sizeof(float));
cudaMalloc(&d_spring_directions, _spring_directions.size() * sizeof(float));
cudaMalloc(&d_inertial_term, _inertial_term.size() * sizeof(float));
cudaMalloc(&d_fext_force, num_points * 3 * sizeof(float)); // Assuming 3D points
cudaMalloc(&d_b, _inertial_term.size() * sizeof(float));
cudaMalloc(&d_rest_lengths, num_springs * sizeof(float));
cudaMalloc(&d_spring_indices, 2 * num_springs * sizeof(int)); // 每个弹簧有两个顶点

// 将数据从 CPU 拷贝到 GPU
cudaMemcpy(d_current_state, _current_state.data(), _current_state.size() * sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(d_rest_lengths, _rest_lengths.data(), num_springs * sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(d_spring_indices, _spring_indices.data(), 2 * num_springs * sizeof(int), cudaMemcpyHostToDevice);

// 调用 CUDA 函数执行 local step
run_local_step_kernel(d_current_state, d_spring_directions, d_rest_lengths, d_spring_indices, num_springs);

// 拷贝结果回 CPU
cudaMemcpy(_spring_directions.data(), d_spring_directions, _spring_directions.size() * sizeof(float), cudaMemcpyDeviceToHost);

// ***** 外力计算 ***** //
dtk::dtkDouble3 fext = _system->GetDefaultGravityAccel();
Vector3f fext_vector(fext.x, fext.y, fext.z);
VectorXf fext_force = fext_vector.replicate(num_points, 1);
cudaMemcpy(d_fext_force, fext_force.data(), num_points * 3 * sizeof(float), cudaMemcpyHostToDevice);

// ***** Right Hand Side (RHS) 的计算 ***** //
VectorXf rhs = _inertial_term + h2 * _J * _spring_directions + h2 * fext_force;
cudaMemcpy(d_b, rhs.data(), rhs.size() * sizeof(float), cudaMemcpyHostToDevice);

// ***** cuSPARSE 矩阵求解 ***** //
Eigen::SparseMatrix<float> system_matrix_eigen = _system_matrix.matrixL(); // 获取分解的下三角矩阵
int nnz = system_matrix_eigen.nonZeros();
int rows = system_matrix_eigen.rows();
int cols = system_matrix_eigen.cols();

// 生成 CSR 格式的数据
std::vector<int> csrRowPtr(rows + 1), csrColInd(nnz);
std::vector<float> csrVal(nnz);
int idx = 0;
for (int k = 0; k < system_matrix_eigen.outerSize(); ++k) {
csrRowPtr[k] = idx;
for (Eigen::SparseMatrix<float>::InnerIterator it(system_matrix_eigen, k); it; ++it) {
csrVal[idx] = it.value();
csrColInd[idx] = it.col();
idx++;
}
}
csrRowPtr[rows] = nnz;

// 在 GPU 上分配 CSR 矩阵
int* d_csrRowPtr, * d_csrColInd;
float* d_csrVal;
cudaMalloc(&d_csrRowPtr, (rows + 1) * sizeof(int));
cudaMalloc(&d_csrColInd, nnz * sizeof(int));
cudaMalloc(&d_csrVal, nnz * sizeof(float));
cudaMemcpy(d_csrRowPtr, csrRowPtr.data(), (rows + 1) * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_csrColInd, csrColInd.data(), nnz * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_csrVal, csrVal.data(), nnz * sizeof(float), cudaMemcpyHostToDevice);

// 使用 cuSPARSE 进行求解
float* d_x;
cudaMalloc(&d_x, rhs.size() * sizeof(float));

cusparseMatDescr_t descr;
cusparseCreateMatDescr(&descr);

// 定义 alpha 和 beta
float alpha = 1.0f;
float beta = 0.0f;

// 使用 cuSPARSE API 进行矩阵-向量乘法
cusparseSpMatDescr_t matA;
cusparseDnVecDescr_t vecX, vecY;
cusparseCreateCsr(&matA, rows, cols, nnz, d_csrRowPtr, d_csrColInd, d_csrVal,
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F);
cusparseCreateDnVec(&vecX, rhs.size(), d_b, CUDA_R_32F);
cusparseCreateDnVec(&vecY, rhs.size(), d_x, CUDA_R_32F);

cusparseSpMV(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, matA, vecX, &beta, vecY,
CUDA_R_32F, CUSPARSE_SPMV_ALG_DEFAULT, nullptr);

// 将解拷贝回 CPU
cudaMemcpy(_current_state.data(), d_x, rhs.size() * sizeof(float), cudaMemcpyDeviceToHost);

// 释放 GPU 资源
cudaFree(d_current_state);
cudaFree(d_spring_directions);
cudaFree(d_inertial_term);
cudaFree(d_fext_force);
cudaFree(d_b);
cudaFree(d_x);
cudaFree(d_csrRowPtr);
cudaFree(d_csrColInd);
cudaFree(d_csrVal);
cudaFree(d_rest_lengths);
cudaFree(d_spring_indices);
cusparseDestroyMatDescr(descr);
cusparseDestroy(handle);
}
// std::cout << "_spring_directions: " << std::endl << _spring_directions << std::endl;
}
else {
// 原有 CPU 代码
for (dtk::dtkID id = 0; id < _system->GetNumberOfSprings(); id++) {
dtk::dtkPhysSpring* spring = _system->GetSpring(id);
dtk::dtkID id1 = spring->GetFirstVertex()->GetPointID();
dtk::dtkID id2 = spring->GetSecondVertex()->GetPointID();
double rest_length = spring->GetRestLength();
Vector3f p12(
_current_state[3 * id1 + 0] - _current_state[3 * id2 + 0],
_current_state[3 * id1 + 1] - _current_state[3 * id2 + 1],
_current_state[3 * id1 + 2] - _current_state[3 * id2 + 2]
);

void dtk::dtkPhysMassSpringSolver::globalStep() {
float h2 = _system->GetTimeStep() * _system->GetTimeStep(); // shorthand

// compute right hand side
// dtk::dtkDouble3 fext(0, 0, 0);
dtk::dtkDouble3 fext = _system->GetDefaultGravityAccel(); // TODO: change to _system->GetImpulseForce()
VectorXf fext_force = VectorXf(Vector3f(fext.x, fext.y, fext.z).replicate(_system->GetNumberOfMassPoints(), 1));

// // 生成随机外力
// std::random_device rd;
// std::mt19937 gen(rd());
// std::uniform_real_distribution<> dis(-1.0, 1.0);
// dtk::dtkDouble3 random_force(dis(gen), dis(gen), dis(gen));
// // 随机选择一个质点
// std::uniform_int_distribution<> point_dis(0, _system->GetNumberOfMassPoints() - 1);
// int random_point = point_dis(gen);
// fext_force.segment<3>(random_point * 3) += Vector3f(random_force.x, random_force.y, random_force.z);

VectorXf b = _inertial_term + h2 * _J * _spring_directions + h2 * fext_force;
// std::cout << "b: " << std::endl << b << std::endl;

// solve system and update state
_current_state = _system_matrix.solve(b);
// std::cout << "_current_state: " << std::endl << _current_state << std::endl;
p12.normalize();
_spring_directions[3 * id + 0] = rest_length * p12[0];
_spring_directions[3 * id + 1] = rest_length * p12[1];
_spring_directions[3 * id + 2] = rest_length * p12[2];
}

float h2 = _system->GetTimeStep() * _system->GetTimeStep();
dtk::dtkDouble3 fext = _system->GetDefaultGravityAccel();
VectorXf fext_force = VectorXf(Vector3f(fext.x, fext.y, fext.z).replicate(_system->GetNumberOfMassPoints(), 1));

VectorXf b = _inertial_term + h2 * _J * _spring_directions + h2 * fext_force;
_current_state = _system_matrix.solve(b);
}
}


void dtk::dtkPhysMassSpringSolver::satisfy(ClothDropType type) {
if (type == Sphere) {
const float radius = 0.64f;
Expand Down
Loading

0 comments on commit 95f5507

Please sign in to comment.