Skip to content

Commit

Permalink
Add test.
Browse files Browse the repository at this point in the history
  • Loading branch information
Zentrik authored and maleadt committed Aug 17, 2023
1 parent aff8cd3 commit b85130d
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 0 deletions.
1 change: 1 addition & 0 deletions src/compiler/execution.jl
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,7 @@ The following keyword arguments are supported:
supported on LLVM 4.0+)
- `name`: override the name that the kernel will have in the generated code
- `always_inline`: inline all function calls in the kernel
- `fastmath`: use less precise square roots and flush denormals
The output of this function is automatically cached, i.e. you can simply call `cufunction`
in a hot path without degrading performance. New code will be generated automatically, when
Expand Down
23 changes: 23 additions & 0 deletions test/core/codegen.jl
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,29 @@ end
@test !occursin(".local", asm)
end

@testset "fastmath" begin
function sqrt_kernel(x)
i = threadIdx().x
@inbounds x[i] = sqrt(x[i])
return
end

function div_kernel(x)
i = threadIdx().x
@fastmath @inbounds x[i] = 1 / x[i]
return
end

asm = sprint(io->CUDA.code_ptx(io, sqrt_kernel, Tuple{CuDeviceArray{Float32,1,AS.Global}}))
@test occursin("sqrt.r", asm)

asm = sprint(io->CUDA.code_ptx(io, sqrt_kernel, Tuple{CuDeviceArray{Float32,1,AS.Global}}; fastmath=true))
@test occursin("sqrt.approx.ftz", asm)

asm = sprint(io->CUDA.code_ptx(io, div_kernel, Tuple{CuDeviceArray{Float32,1,AS.Global}}; fastmath=true))
@test occursin("div.approx.ftz", asm)
end

end

############################################################################################
Expand Down

0 comments on commit b85130d

Please sign in to comment.