Disable fp8 kv cache for lovelace (#520)

predibase · Jun 18, 2024 · 49bb52f · 49bb52f
1 parent 559fc3b
commit 49bb52f
Showing 1 changed file with 2 additions and 1 deletion.
diff --git a/server/lorax_server/utils/paged_attention.py b/server/lorax_server/utils/paged_attention.py
@@ -15,7 +15,8 @@
         )
 
 if torch.cuda.is_available():
-    fp8_supported = torch.cuda.get_device_capability()[0] >= 9 or (torch.cuda.get_device_capability()[0] == 8 and torch.cuda.get_device_capability()[1] >= 9)
+    # TODO(travis): fix for CUDA 8.9 (Lovelace)
+    fp8_supported = torch.cuda.get_device_capability()[0] >= 9 #or (torch.cuda.get_device_capability()[0] == 8 and torch.cuda.get_device_capability()[1] >= 9)
 else:
     fp8_supported = False