Skip to content

Commit

Permalink
Remove redundant inline specifiers; preparing for upstream
Browse files Browse the repository at this point in the history
  • Loading branch information
mawong-amd committed Mar 27, 2024
1 parent d76bfa2 commit 1ce36d0
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions csrc/layernorm_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ struct _f16Vec {
using T2 = typename Converter::packed_hip_type;
T1 data[width];

__device__ inline _f16Vec& operator+=(const _f16Vec<scalar_t, width>& other) {
__device__ _f16Vec& operator+=(const _f16Vec<scalar_t, width>& other) {
if constexpr (width % 2 == 0) {
#pragma unroll
for (int i = 0; i < width; i += 2) {
Expand All @@ -120,7 +120,7 @@ struct _f16Vec {
return *this;
}

__device__ inline _f16Vec& operator*=(const _f16Vec<scalar_t, width>& other) {
__device__ _f16Vec& operator*=(const _f16Vec<scalar_t, width>& other) {
if constexpr (width % 2 == 0) {
#pragma unroll
for (int i = 0; i < width; i += 2) {
Expand All @@ -137,7 +137,7 @@ struct _f16Vec {
return *this;
}

__device__ inline _f16Vec& operator*=(const float scale) {
__device__ _f16Vec& operator*=(const float scale) {
if constexpr (width % 2 == 0) {
#pragma unroll
for (int i = 0; i < width; i += 2) {
Expand All @@ -158,7 +158,7 @@ struct _f16Vec {
return *this;
}

__device__ inline float sum_squares() const {
__device__ float sum_squares() const {
float result = 0.0f;
if constexpr (width % 2 == 0) {
#pragma unroll
Expand Down

0 comments on commit 1ce36d0

Please sign in to comment.