Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Run Runic after explicit return rule addition #516

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions .github/workflows/Check.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name: Check
on:
push:
branches:
- 'master'
- 'release-'
tags:
- '*'
pull_request:
jobs:
runic:
name: Runic formatting
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: julia-actions/setup-julia@v2
with:
version: "1"
- uses: julia-actions/cache@v2
- uses: fredrikekre/runic-action@v1
with:
version: "1"
25 changes: 0 additions & 25 deletions .github/workflows/runic.yml

This file was deleted.

1 change: 1 addition & 0 deletions docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ function main()
push_preview = true,
)
end
return
end

isinteractive() || main()
2 changes: 2 additions & 0 deletions examples/histogram.jl
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,14 @@ function histogram!(histogram_output, input)
# Need static block size
kernel! = histogram_kernel!(backend, (256,))
kernel!(histogram_output, input, ndrange = size(input))
return
end

function move(backend, input)
# TODO replace with adapt(backend, input)
out = KernelAbstractions.allocate(backend, eltype(input), size(input))
KernelAbstractions.copyto!(backend, out, input)
return out
end

@testset "histogram tests" begin
Expand Down
1 change: 1 addition & 0 deletions examples/matmul.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ function matmul!(output, a, b)
backend = KernelAbstractions.get_backend(a)
kernel! = matmul_kernel!(backend)
kernel!(output, a, b, ndrange = size(output))
return
end

a = rand!(allocate(backend, Float32, 256, 123))
Expand Down
1 change: 1 addition & 0 deletions examples/memcopy.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ function mycopy!(A, B)

kernel = copy_kernel!(backend)
kernel(A, B, ndrange = length(A))
return
end

A = KernelAbstractions.zeros(backend, Float64, 128, 128)
Expand Down
1 change: 1 addition & 0 deletions examples/memcopy_static.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ function mycopy_static!(A, B)

kernel = copy_kernel!(backend, 32, size(A)) # if size(A) varies this will cause recompilation
kernel(A, B, ndrange = size(A))
return
end

A = KernelAbstractions.zeros(backend, Float64, 128, 128)
Expand Down
3 changes: 3 additions & 0 deletions examples/mpi.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ function cooperative_test!(req)
done, _ = MPI.Test(req, MPI.Status)
yield()
end
return
end

function cooperative_wait(task::Task)
Expand All @@ -17,6 +18,7 @@ function cooperative_wait(task::Task)
yield()
end
wait(task)
return
end

function exchange!(h_send_buf, d_recv_buf, h_recv_buf, src_rank, dst_rank, comm)
Expand Down Expand Up @@ -68,6 +70,7 @@ function main(backend)
cooperative_wait(send_task)

@test all(d_recv_buf .== src_rank)
return
end

main(backend)
1 change: 1 addition & 0 deletions examples/naive_transpose.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ function naive_transpose!(a, b)
groupsize = KernelAbstractions.isgpu(backend) ? 256 : 1024
kernel! = naive_transpose_kernel!(backend, groupsize)
kernel!(a, b, ndrange = size(a))
return
end

# resolution of grid will be res*res
Expand Down
4 changes: 2 additions & 2 deletions ext/EnzymeCore07Ext.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ function EnzymeRules.forward(
f = kernel.f
fwd_kernel = similar(kernel, cpu_fwd)

fwd_kernel(f, args...; ndrange, workgroupsize)
return fwd_kernel(f, args...; ndrange, workgroupsize)
end

function EnzymeRules.forward(
Expand All @@ -36,7 +36,7 @@ function EnzymeRules.forward(
f = kernel.f
fwd_kernel = similar(kernel, gpu_fwd)

fwd_kernel(f, args...; ndrange, workgroupsize)
return fwd_kernel(f, args...; ndrange, workgroupsize)
end

_enzyme_mkcontext(kernel::Kernel{CPU}, ndrange, iterspace, dynamic) =
Expand Down
4 changes: 2 additions & 2 deletions ext/EnzymeCore08Ext.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ function EnzymeRules.forward(
f = kernel.f
fwd_kernel = similar(kernel, cpu_fwd)

fwd_kernel(config, f, args...; ndrange, workgroupsize)
return fwd_kernel(config, f, args...; ndrange, workgroupsize)
end

function EnzymeRules.forward(
Expand All @@ -38,7 +38,7 @@ function EnzymeRules.forward(
f = kernel.f
fwd_kernel = similar(kernel, gpu_fwd)

fwd_kernel(config, f, args...; ndrange, workgroupsize)
return fwd_kernel(config, f, args...; ndrange, workgroupsize)
end

_enzyme_mkcontext(kernel::Kernel{CPU}, ndrange, iterspace, dynamic) =
Expand Down
32 changes: 16 additions & 16 deletions src/KernelAbstractions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ synchronize(backend)
```
"""
macro kernel(expr)
__kernel(expr, #=generate_cpu=# true, #=force_inbounds=# false)
return __kernel(expr, #=generate_cpu=# true, #=force_inbounds=# false)
end

"""
Expand All @@ -69,7 +69,7 @@ This allows for two different configurations:
"""
macro kernel(ex...)
if length(ex) == 1
__kernel(ex[1], true, false)
return __kernel(ex[1], true, false)
else
generate_cpu = true
force_inbounds = false
Expand All @@ -89,7 +89,7 @@ macro kernel(ex...)
)
end
end
__kernel(ex[end], generate_cpu, force_inbounds)
return __kernel(ex[end], generate_cpu, force_inbounds)
end
end

Expand Down Expand Up @@ -167,7 +167,7 @@ a tuple corresponding to kernel configuration. In order to get
the total size you can use `prod(@groupsize())`.
"""
macro groupsize()
quote
return quote
$groupsize($(esc(:__ctx__)))
end
end
Expand All @@ -179,7 +179,7 @@ Query the ndrange on the backend. This function returns
a tuple corresponding to kernel configuration.
"""
macro ndrange()
quote
return quote
$size($ndrange($(esc(:__ctx__))))
end
end
Expand All @@ -193,7 +193,7 @@ macro localmem(T, dims)
# Stay in sync with CUDAnative
id = gensym("static_shmem")

quote
return quote
$SharedMemory($(esc(T)), Val($(esc(dims))), Val($(QuoteNode(id))))
end
end
Expand All @@ -214,7 +214,7 @@ macro private(T, dims)
if dims isa Integer
dims = (dims,)
end
quote
return quote
$Scratchpad($(esc(:__ctx__)), $(esc(T)), Val($(esc(dims))))
end
end
Expand All @@ -226,7 +226,7 @@ Creates a private local of `mem` per item in the workgroup. This can be safely u
across [`@synchronize`](@ref) statements.
"""
macro private(expr)
esc(expr)
return esc(expr)
end

"""
Expand All @@ -236,7 +236,7 @@ end
that span workitems, or are reused across `@synchronize` statements.
"""
macro uniform(value)
esc(value)
return esc(value)
end

"""
Expand All @@ -247,7 +247,7 @@ from each thread in the workgroup are visible in from all other threads in the
workgroup.
"""
macro synchronize()
quote
return quote
$__synchronize()
end
end
Expand All @@ -264,7 +264,7 @@ workgroup. `cond` is not allowed to have any visible sideffects.
- `CPU`: This synchronization will always occur.
"""
macro synchronize(cond)
quote
return quote
$(esc(cond)) && $__synchronize()
end
end
Expand All @@ -289,7 +289,7 @@ end
```
"""
macro context()
esc(:(__ctx__))
return esc(:(__ctx__))
end

"""
Expand Down Expand Up @@ -329,7 +329,7 @@ macro print(items...)
end
end

quote
return quote
$__print($(map(esc, args)...))
end
end
Expand Down Expand Up @@ -385,7 +385,7 @@ macro index(locale, args...)
end

index_function = Symbol(:__index_, locale, :_, indexkind)
Expr(:call, GlobalRef(KernelAbstractions, index_function), esc(:__ctx__), map(esc, args)...)
return Expr(:call, GlobalRef(KernelAbstractions, index_function), esc(:__ctx__), map(esc, args)...)
end

###
Expand Down Expand Up @@ -591,7 +591,7 @@ struct Kernel{Backend, WorkgroupSize <: _Size, NDRange <: _Size, Fun}
end

function Base.similar(kernel::Kernel{D, WS, ND}, f::F) where {D, WS, ND, F}
Kernel{D, WS, ND, F}(kernel.backend, f)
return Kernel{D, WS, ND, F}(kernel.backend, f)
end

workgroupsize(::Kernel{D, WorkgroupSize}) where {D, WorkgroupSize} = WorkgroupSize
Expand Down Expand Up @@ -701,7 +701,7 @@ end
push!(args, item)
end

quote
return quote
print($(args...))
end
end
Expand Down
10 changes: 5 additions & 5 deletions src/cpu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ function (obj::Kernel{CPU})(args...; ndrange = nothing, workgroupsize = nothing)
return nothing
end

__run(obj, ndrange, iterspace, args, dynamic, obj.backend.static)
return __run(obj, ndrange, iterspace, args, dynamic, obj.backend.static)
end

const CPU_GRAINSIZE = 1024 # Vectorization, 4x unrolling, minimal grain size
Expand Down Expand Up @@ -162,15 +162,15 @@ end

@inline function __index_Global_Linear(ctx, idx::CartesianIndex)
I = @inbounds expand(__iterspace(ctx), __groupindex(ctx), idx)
@inbounds LinearIndices(__ndrange(ctx))[I]
return @inbounds LinearIndices(__ndrange(ctx))[I]
end

@inline function __index_Local_Cartesian(_, idx::CartesianIndex)
return idx
end

@inline function __index_Group_Cartesian(ctx, ::CartesianIndex)
__groupindex(ctx)
return __groupindex(ctx)
end

@inline function __index_Global_Cartesian(ctx, idx::CartesianIndex)
Expand All @@ -191,7 +191,7 @@ end
# CPU implementation of shared memory
###
@inline function SharedMemory(::Type{T}, ::Val{Dims}, ::Val) where {T, Dims}
MArray{__size(Dims), T}(undef)
return MArray{__size(Dims), T}(undef)
end

###
Expand All @@ -212,7 +212,7 @@ end
# https://github.com/JuliaLang/julia/issues/39308
@inline function aview(A, I::Vararg{Any, N}) where {N}
J = Base.to_indices(A, I)
Base.unsafe_view(Base._maybe_reshape_parent(A, Base.index_ndims(J...)), J...)
return Base.unsafe_view(Base._maybe_reshape_parent(A, Base.index_ndims(J...)), J...)
end

@inline function Base.getindex(A::ScratchArray{N}, idx) where {N}
Expand Down
8 changes: 5 additions & 3 deletions src/macros.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ function find_return(stmt)
result |= @capture(expr, return x_)
expr
end
result
return result
end

# XXX: Proper errors
Expand Down Expand Up @@ -103,6 +103,7 @@ function transform_gpu!(def, constargs, force_inbounds)
Expr(:block, let_constargs...),
body,
)
return
end

# The hard case, transform the function for CPU execution
Expand Down Expand Up @@ -137,6 +138,7 @@ function transform_cpu!(def, constargs, force_inbounds)
Expr(:block, let_constargs...),
Expr(:block, new_stmts...),
)
return
end

struct WorkgroupLoop
Expand All @@ -150,7 +152,7 @@ end
is_sync(expr) = @capture(expr, @synchronize() | @synchronize(a_))

function is_scope_construct(expr::Expr)
expr.head === :block # ||
return expr.head === :block # ||
# expr.head === :let
end

Expand All @@ -160,7 +162,7 @@ function find_sync(stmt)
result |= is_sync(expr)
expr
end
result
return result
end

# TODO proper handling of LineInfo
Expand Down
Loading
Loading