Skip to content

Commit

Permalink
[Kernel] Turn off cutlass scaled_mm for Lovelace
Browse files Browse the repository at this point in the history
  • Loading branch information
tlrmchlsmth committed Jul 12, 2024
1 parent 55f692b commit b9eaa67
Showing 1 changed file with 8 additions and 2 deletions.
10 changes: 8 additions & 2 deletions csrc/quantization/cutlass_w8a8/scaled_mm_entry.cu
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,13 @@ bool cutlass_scaled_mm_supports_fp8(int64_t cuda_device_capability) {
if (cuda_device_capability >= 90) {
return CUDA_VERSION >= 12000;
} else if (cuda_device_capability >= 89) {
return CUDA_VERSION >= 12040;
// CUTLASS Kernels have not been tuned for Ada Lovelace systems
// and are slower than torch.mm. Return false unconditionally in this case.
return false;

// Once the CUTLASS kernels have been optimized for Lovelace systems,
// use the following check:
// return CUDA_VERSION >= 12040;
}
#endif

Expand Down Expand Up @@ -98,4 +104,4 @@ void cutlass_scaled_mm(torch::Tensor& c, torch::Tensor const& a,
TORCH_CHECK(version_num >= 75);
cutlass_scaled_mm_sm75(c, a, b, a_scales, b_scales, bias);
}
}
}

0 comments on commit b9eaa67

Please sign in to comment.