FX: has_inf_or_nan cuda invalid configuration argument

OpenBMB · May 1, 2022 · cb06d14 · cb06d14
1 parent 81f1a03
commit cb06d14
Show file tree

Hide file tree

Showing 3 changed files with 3 additions and 1 deletion.
diff --git a/Dockerfile b/Dockerfile
@@ -10,7 +10,7 @@ RUN pip3 install torch==1.10.0 -i https://pypi.tuna.tsinghua.edu.cn/simple
 RUN pip3 install numpy -i https://pypi.tuna.tsinghua.edu.cn/simple
 RUN apt install iputils-ping opensm libopensm-dev libibverbs1 libibverbs-dev -y --no-install-recommends
 ENV TORCH_CUDA_ARCH_LIST=6.1;7.0;7.5
-ENV BMP_AVX512=1
+ENV BMT_AVX512=1
 ADD other_requirements.txt other_requirements.txt
 RUN pip3 install --upgrade pip && pip3 install -r other_requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
 ADD . .

diff --git a/csrc/cuda/adam.cu b/csrc/cuda/adam.cu
@@ -50,6 +50,7 @@ void adam_launcher(
     float bias_correction2
 ) {
     int32_t n = param_fp32.numel();
+    if (n <= 0) return;
     auto g_ptr = reinterpret_cast<half*>(g_fp16.data_ptr<at::Half>());
     auto m_ptr = reinterpret_cast<half*>(m_fp16.data_ptr<at::Half>());
     auto v_ptr = v_fp32.data_ptr<float>();

diff --git a/csrc/cuda/has_inf_nan.cu b/csrc/cuda/has_inf_nan.cu
@@ -73,6 +73,7 @@ void has_nan_inf_launcher(
     torch::Tensor out
 ) {
     int n = g_fp16.numel();
+    if (n <= 0) return;
     auto g_ptr = reinterpret_cast<half*>(g_fp16.data_ptr<at::Half>());
     auto mid_ptr = mid.data_ptr<uint8_t>();
     auto stream = at::cuda::getCurrentCUDAStream();