Skip to content

Commit

Permalink
Disable sqrt fast math test on CUDA 11.0.
Browse files Browse the repository at this point in the history
  • Loading branch information
maleadt committed Aug 17, 2023
1 parent b85130d commit a2d3219
Showing 1 changed file with 15 additions and 12 deletions.
27 changes: 15 additions & 12 deletions test/core/codegen.jl
Original file line number Diff line number Diff line change
Expand Up @@ -158,26 +158,29 @@ end
end

@testset "fastmath" begin
function sqrt_kernel(x)
i = threadIdx().x
@inbounds x[i] = sqrt(x[i])
return
end

function div_kernel(x)
i = threadIdx().x
@fastmath @inbounds x[i] = 1 / x[i]
return
end

asm = sprint(io->CUDA.code_ptx(io, sqrt_kernel, Tuple{CuDeviceArray{Float32,1,AS.Global}}))
@test occursin("sqrt.r", asm)

asm = sprint(io->CUDA.code_ptx(io, sqrt_kernel, Tuple{CuDeviceArray{Float32,1,AS.Global}}; fastmath=true))
@test occursin("sqrt.approx.ftz", asm)

asm = sprint(io->CUDA.code_ptx(io, div_kernel, Tuple{CuDeviceArray{Float32,1,AS.Global}}; fastmath=true))
@test occursin("div.approx.ftz", asm)

# libdevice only contains fast math versions of sqrt for CUDA 11.1+
if CUDA.runtime_version() >= v"11.1"
function sqrt_kernel(x)
i = threadIdx().x
@inbounds x[i] = sqrt(x[i])
return
end

asm = sprint(io->CUDA.code_ptx(io, sqrt_kernel, Tuple{CuDeviceArray{Float32,1,AS.Global}}))
@test occursin("sqrt.r", asm)

asm = sprint(io->CUDA.code_ptx(io, sqrt_kernel, Tuple{CuDeviceArray{Float32,1,AS.Global}}; fastmath=true))
@test occursin("sqrt.approx.ftz", asm)
end
end

end
Expand Down

0 comments on commit a2d3219

Please sign in to comment.