diff --git a/csrc/core/math.hpp b/csrc/core/math.hpp
index 80cce89560f70..30632f20b61c0 100644
--- a/csrc/core/math.hpp
+++ b/csrc/core/math.hpp
@@ -6,6 +6,7 @@
 inline uint32_t next_pow_2(uint32_t const num) {
   if (num <= 1) return num;
   return 1 << (CHAR_BIT * sizeof(num) - __builtin_clz(num - 1));
+}
 
 template <typename A, typename B>
 static inline constexpr auto div_ceil(A a, B b) {
diff --git a/csrc/quantization/activation_kernels.cu b/csrc/quantization/activation_kernels.cu
index bac41e65e7b30..33610a898af61 100644
--- a/csrc/quantization/activation_kernels.cu
+++ b/csrc/quantization/activation_kernels.cu
@@ -107,10 +107,9 @@ __global__ void act_and_mul_quant_kernel(
                                          scale.data_ptr<float>(), d);   \
       });
 
-void silu_and_mul_quant(torch::Tensor& out,   // [..., d]
-                        torch::Tensor& input, // [..., 2 * d]
-                        torch::Tensor& scale) 
-{
+void silu_and_mul_quant(torch::Tensor& out,    // [..., d]
+                        torch::Tensor& input,  // [..., 2 * d]
+                        torch::Tensor& scale) {
   TORCH_CHECK(out.dtype() == torch::kFloat8_e4m3fn);
   TORCH_CHECK(input.dtype() == torch::kFloat16 ||
               input.dtype() == torch::kBFloat16);