diff --git a/.github/workflows/Check.yml b/.github/workflows/Check.yml
new file mode 100644
index 00000000..e95e67d2
--- /dev/null
+++ b/.github/workflows/Check.yml
@@ -0,0 +1,22 @@
+name: Check
+on:
+  push:
+    branches:
+      - 'master'
+      - 'release-'
+    tags:
+      - '*'
+  pull_request:
+jobs:
+  runic:
+    name: Runic formatting
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: julia-actions/setup-julia@v2
+        with:
+          version: "1"
+      - uses: julia-actions/cache@v2
+      - uses: fredrikekre/runic-action@v1
+        with:
+          version: "1"
diff --git a/.github/workflows/runic.yml b/.github/workflows/runic.yml
deleted file mode 100644
index d512f9de..00000000
--- a/.github/workflows/runic.yml
+++ /dev/null
@@ -1,25 +0,0 @@
-name: Runic formatting
-on:
-  push:
-    branches:
-      - 'master'
-      - 'release-'
-    tags:
-      - '*'
-  pull_request:
-jobs:
-  runic:
-    name: Runic
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-      - uses: julia-actions/setup-julia@v2
-        with:
-          version: "nightly" # Only nightly have the -m flag currently
-      - uses: julia-actions/cache@v2
-      - name: Install Runic
-        run: |
-          julia --color=yes --project=@runic -e 'using Pkg; Pkg.add(url = "https://github.com/fredrikekre/Runic.jl", rev = "e128bc9b77ea44b8fe23a0c3afe741a19c71a7b2")'
-      - name: Run Runic
-        run: |
-          git ls-files -z -- '*.jl' | xargs -0 julia --project=@runic -m Runic --check --diff
diff --git a/docs/make.jl b/docs/make.jl
index 61c50655..e51966a7 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -44,6 +44,7 @@ function main()
             push_preview = true,
         )
     end
+    return
 end
 
 isinteractive() || main()
diff --git a/examples/histogram.jl b/examples/histogram.jl
index 311cff76..958fa0e1 100644
--- a/examples/histogram.jl
+++ b/examples/histogram.jl
@@ -63,12 +63,14 @@ function histogram!(histogram_output, input)
     # Need static block size
     kernel! = histogram_kernel!(backend, (256,))
     kernel!(histogram_output, input, ndrange = size(input))
+    return
 end
 
 function move(backend, input)
     # TODO replace with adapt(backend, input)
     out = KernelAbstractions.allocate(backend, eltype(input), size(input))
     KernelAbstractions.copyto!(backend, out, input)
+    return out
 end
 
 @testset "histogram tests" begin
diff --git a/examples/matmul.jl b/examples/matmul.jl
index 4ade3f37..aec4fb2b 100644
--- a/examples/matmul.jl
+++ b/examples/matmul.jl
@@ -23,6 +23,7 @@ function matmul!(output, a, b)
     backend = KernelAbstractions.get_backend(a)
     kernel! = matmul_kernel!(backend)
     kernel!(output, a, b, ndrange = size(output))
+    return
 end
 
 a = rand!(allocate(backend, Float32, 256, 123))
diff --git a/examples/memcopy.jl b/examples/memcopy.jl
index f826caec..3159f367 100644
--- a/examples/memcopy.jl
+++ b/examples/memcopy.jl
@@ -13,6 +13,7 @@ function mycopy!(A, B)
 
     kernel = copy_kernel!(backend)
     kernel(A, B, ndrange = length(A))
+    return
 end
 
 A = KernelAbstractions.zeros(backend, Float64, 128, 128)
diff --git a/examples/memcopy_static.jl b/examples/memcopy_static.jl
index e3239081..9f088294 100644
--- a/examples/memcopy_static.jl
+++ b/examples/memcopy_static.jl
@@ -13,6 +13,7 @@ function mycopy_static!(A, B)
 
     kernel = copy_kernel!(backend, 32, size(A)) # if size(A) varies this will cause recompilation
     kernel(A, B, ndrange = size(A))
+    return
 end
 
 A = KernelAbstractions.zeros(backend, Float64, 128, 128)
diff --git a/examples/mpi.jl b/examples/mpi.jl
index 2f5e690d..aad02536 100644
--- a/examples/mpi.jl
+++ b/examples/mpi.jl
@@ -9,6 +9,7 @@ function cooperative_test!(req)
         done, _ = MPI.Test(req, MPI.Status)
         yield()
     end
+    return
 end
 
 function cooperative_wait(task::Task)
@@ -17,6 +18,7 @@ function cooperative_wait(task::Task)
         yield()
     end
     wait(task)
+    return
 end
 
 function exchange!(h_send_buf, d_recv_buf, h_recv_buf, src_rank, dst_rank, comm)
@@ -68,6 +70,7 @@ function main(backend)
     cooperative_wait(send_task)
 
     @test all(d_recv_buf .== src_rank)
+    return
 end
 
 main(backend)
diff --git a/examples/naive_transpose.jl b/examples/naive_transpose.jl
index 20ea6a0c..dc6908ae 100644
--- a/examples/naive_transpose.jl
+++ b/examples/naive_transpose.jl
@@ -18,6 +18,7 @@ function naive_transpose!(a, b)
     groupsize = KernelAbstractions.isgpu(backend) ? 256 : 1024
     kernel! = naive_transpose_kernel!(backend, groupsize)
     kernel!(a, b, ndrange = size(a))
+    return
 end
 
 # resolution of grid will be res*res
diff --git a/ext/EnzymeCore07Ext.jl b/ext/EnzymeCore07Ext.jl
index 1998b020..93159886 100644
--- a/ext/EnzymeCore07Ext.jl
+++ b/ext/EnzymeCore07Ext.jl
@@ -22,7 +22,7 @@ function EnzymeRules.forward(
     f = kernel.f
     fwd_kernel = similar(kernel, cpu_fwd)
 
-    fwd_kernel(f, args...; ndrange, workgroupsize)
+    return fwd_kernel(f, args...; ndrange, workgroupsize)
 end
 
 function EnzymeRules.forward(
@@ -36,7 +36,7 @@ function EnzymeRules.forward(
     f = kernel.f
     fwd_kernel = similar(kernel, gpu_fwd)
 
-    fwd_kernel(f, args...; ndrange, workgroupsize)
+    return fwd_kernel(f, args...; ndrange, workgroupsize)
 end
 
 _enzyme_mkcontext(kernel::Kernel{CPU}, ndrange, iterspace, dynamic) =
diff --git a/ext/EnzymeCore08Ext.jl b/ext/EnzymeCore08Ext.jl
index 8bdd8ee4..1fda8512 100644
--- a/ext/EnzymeCore08Ext.jl
+++ b/ext/EnzymeCore08Ext.jl
@@ -23,7 +23,7 @@ function EnzymeRules.forward(
     f = kernel.f
     fwd_kernel = similar(kernel, cpu_fwd)
 
-    fwd_kernel(config, f, args...; ndrange, workgroupsize)
+    return fwd_kernel(config, f, args...; ndrange, workgroupsize)
 end
 
 function EnzymeRules.forward(
@@ -38,7 +38,7 @@ function EnzymeRules.forward(
     f = kernel.f
     fwd_kernel = similar(kernel, gpu_fwd)
 
-    fwd_kernel(config, f, args...; ndrange, workgroupsize)
+    return fwd_kernel(config, f, args...; ndrange, workgroupsize)
 end
 
 _enzyme_mkcontext(kernel::Kernel{CPU}, ndrange, iterspace, dynamic) =
diff --git a/src/KernelAbstractions.jl b/src/KernelAbstractions.jl
index 741ac12f..13af92da 100644
--- a/src/KernelAbstractions.jl
+++ b/src/KernelAbstractions.jl
@@ -51,7 +51,7 @@ synchronize(backend)
 ```
 """
 macro kernel(expr)
-    __kernel(expr, #=generate_cpu=# true, #=force_inbounds=# false)
+    return __kernel(expr, #=generate_cpu=# true, #=force_inbounds=# false)
 end
 
 """
@@ -69,7 +69,7 @@ This allows for two different configurations:
 """
 macro kernel(ex...)
     if length(ex) == 1
-        __kernel(ex[1], true, false)
+        return __kernel(ex[1], true, false)
     else
         generate_cpu = true
         force_inbounds = false
@@ -89,7 +89,7 @@ macro kernel(ex...)
                 )
             end
         end
-        __kernel(ex[end], generate_cpu, force_inbounds)
+        return __kernel(ex[end], generate_cpu, force_inbounds)
     end
 end
 
@@ -167,7 +167,7 @@ a tuple corresponding to kernel configuration. In order to get
 the total size you can use `prod(@groupsize())`.
 """
 macro groupsize()
-    quote
+    return quote
         $groupsize($(esc(:__ctx__)))
     end
 end
@@ -179,7 +179,7 @@ Query the ndrange on the backend. This function returns
 a tuple corresponding to kernel configuration.
 """
 macro ndrange()
-    quote
+    return quote
         $size($ndrange($(esc(:__ctx__))))
     end
 end
@@ -193,7 +193,7 @@ macro localmem(T, dims)
     # Stay in sync with CUDAnative
     id = gensym("static_shmem")
 
-    quote
+    return quote
         $SharedMemory($(esc(T)), Val($(esc(dims))), Val($(QuoteNode(id))))
     end
 end
@@ -214,7 +214,7 @@ macro private(T, dims)
     if dims isa Integer
         dims = (dims,)
     end
-    quote
+    return quote
         $Scratchpad($(esc(:__ctx__)), $(esc(T)), Val($(esc(dims))))
     end
 end
@@ -226,7 +226,7 @@ Creates a private local of `mem` per item in the workgroup. This can be safely u
 across [`@synchronize`](@ref) statements.
 """
 macro private(expr)
-    esc(expr)
+    return esc(expr)
 end
 
 """
@@ -236,7 +236,7 @@ end
 that span workitems, or are reused across `@synchronize` statements.
 """
 macro uniform(value)
-    esc(value)
+    return esc(value)
 end
 
 """
@@ -247,7 +247,7 @@ from each thread in the workgroup are visible in from all other threads in the
 workgroup.
 """
 macro synchronize()
-    quote
+    return quote
         $__synchronize()
     end
 end
@@ -264,7 +264,7 @@ workgroup. `cond` is not allowed to have any visible sideffects.
   - `CPU`: This synchronization will always occur.
 """
 macro synchronize(cond)
-    quote
+    return quote
         $(esc(cond)) && $__synchronize()
     end
 end
@@ -289,7 +289,7 @@ end
 ```
 """
 macro context()
-    esc(:(__ctx__))
+    return esc(:(__ctx__))
 end
 
 """
@@ -329,7 +329,7 @@ macro print(items...)
         end
     end
 
-    quote
+    return quote
         $__print($(map(esc, args)...))
     end
 end
@@ -385,7 +385,7 @@ macro index(locale, args...)
     end
 
     index_function = Symbol(:__index_, locale, :_, indexkind)
-    Expr(:call, GlobalRef(KernelAbstractions, index_function), esc(:__ctx__), map(esc, args)...)
+    return Expr(:call, GlobalRef(KernelAbstractions, index_function), esc(:__ctx__), map(esc, args)...)
 end
 
 ###
@@ -591,7 +591,7 @@ struct Kernel{Backend, WorkgroupSize <: _Size, NDRange <: _Size, Fun}
 end
 
 function Base.similar(kernel::Kernel{D, WS, ND}, f::F) where {D, WS, ND, F}
-    Kernel{D, WS, ND, F}(kernel.backend, f)
+    return Kernel{D, WS, ND, F}(kernel.backend, f)
 end
 
 workgroupsize(::Kernel{D, WorkgroupSize}) where {D, WorkgroupSize} = WorkgroupSize
@@ -701,7 +701,7 @@ end
         push!(args, item)
     end
 
-    quote
+    return quote
         print($(args...))
     end
 end
diff --git a/src/cpu.jl b/src/cpu.jl
index bae45a3a..c4d7ab8b 100644
--- a/src/cpu.jl
+++ b/src/cpu.jl
@@ -43,7 +43,7 @@ function (obj::Kernel{CPU})(args...; ndrange = nothing, workgroupsize = nothing)
         return nothing
     end
 
-    __run(obj, ndrange, iterspace, args, dynamic, obj.backend.static)
+    return __run(obj, ndrange, iterspace, args, dynamic, obj.backend.static)
 end
 
 const CPU_GRAINSIZE = 1024 # Vectorization, 4x unrolling, minimal grain size
@@ -162,7 +162,7 @@ end
 
 @inline function __index_Global_Linear(ctx, idx::CartesianIndex)
     I = @inbounds expand(__iterspace(ctx), __groupindex(ctx), idx)
-    @inbounds LinearIndices(__ndrange(ctx))[I]
+    return @inbounds LinearIndices(__ndrange(ctx))[I]
 end
 
 @inline function __index_Local_Cartesian(_, idx::CartesianIndex)
@@ -170,7 +170,7 @@ end
 end
 
 @inline function __index_Group_Cartesian(ctx, ::CartesianIndex)
-    __groupindex(ctx)
+    return __groupindex(ctx)
 end
 
 @inline function __index_Global_Cartesian(ctx, idx::CartesianIndex)
@@ -191,7 +191,7 @@ end
 # CPU implementation of shared memory
 ###
 @inline function SharedMemory(::Type{T}, ::Val{Dims}, ::Val) where {T, Dims}
-    MArray{__size(Dims), T}(undef)
+    return MArray{__size(Dims), T}(undef)
 end
 
 ###
@@ -212,7 +212,7 @@ end
 # https://github.com/JuliaLang/julia/issues/39308
 @inline function aview(A, I::Vararg{Any, N}) where {N}
     J = Base.to_indices(A, I)
-    Base.unsafe_view(Base._maybe_reshape_parent(A, Base.index_ndims(J...)), J...)
+    return Base.unsafe_view(Base._maybe_reshape_parent(A, Base.index_ndims(J...)), J...)
 end
 
 @inline function Base.getindex(A::ScratchArray{N}, idx) where {N}
diff --git a/src/macros.jl b/src/macros.jl
index a511758d..02b93ed7 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -6,7 +6,7 @@ function find_return(stmt)
         result |= @capture(expr, return x_)
         expr
     end
-    result
+    return result
 end
 
 # XXX: Proper errors
@@ -103,6 +103,7 @@ function transform_gpu!(def, constargs, force_inbounds)
         Expr(:block, let_constargs...),
         body,
     )
+    return
 end
 
 # The hard case, transform the function for CPU execution
@@ -137,6 +138,7 @@ function transform_cpu!(def, constargs, force_inbounds)
         Expr(:block, let_constargs...),
         Expr(:block, new_stmts...),
     )
+    return
 end
 
 struct WorkgroupLoop
@@ -150,7 +152,7 @@ end
 is_sync(expr) = @capture(expr, @synchronize() | @synchronize(a_))
 
 function is_scope_construct(expr::Expr)
-    expr.head === :block # ||
+    return expr.head === :block # ||
     # expr.head === :let
 end
 
@@ -160,7 +162,7 @@ function find_sync(stmt)
         result |= is_sync(expr)
         expr
     end
-    result
+    return result
 end
 
 # TODO proper handling of LineInfo
diff --git a/src/nditeration.jl b/src/nditeration.jl
index ab3fd4ec..33c5c072 100644
--- a/src/nditeration.jl
+++ b/src/nditeration.jl
@@ -13,7 +13,7 @@ abstract type _Size end
 struct DynamicSize <: _Size end
 struct StaticSize{S} <: _Size
     function StaticSize{S}() where {S}
-        new{S::Tuple{Vararg{Int}}}()
+        return new{S::Tuple{Vararg{Int}}}()
     end
 end
 
@@ -51,11 +51,11 @@ struct NDRange{N, StaticBlocks, StaticWorkitems, DynamicBlock, DynamicWorkitems}
     workitems::DynamicWorkitems
 
     function NDRange{N, B, W}() where {N, B, W}
-        new{N, B, W, Nothing, Nothing}(nothing, nothing)
+        return new{N, B, W, Nothing, Nothing}(nothing, nothing)
     end
 
     function NDRange{N, B, W}(blocks, workitems) where {N, B, W}
-        new{N, B, W, typeof(blocks), typeof(workitems)}(blocks, workitems)
+        return new{N, B, W, typeof(blocks), typeof(workitems)}(blocks, workitems)
     end
 end
 
@@ -77,19 +77,19 @@ Base.length(range::NDRange) = length(blocks(range))
         gidx = groupidx.I[I]
         (gidx - 1) * stride + idx.I[I]
     end
-    CartesianIndex(nI)
+    return CartesianIndex(nI)
 end
 
 Base.@propagate_inbounds function expand(ndrange::NDRange, groupidx::Integer, idx::Integer)
-    expand(ndrange, blocks(ndrange)[groupidx], workitems(ndrange)[idx])
+    return expand(ndrange, blocks(ndrange)[groupidx], workitems(ndrange)[idx])
 end
 
 Base.@propagate_inbounds function expand(ndrange::NDRange{N}, groupidx::CartesianIndex{N}, idx::Integer) where {N}
-    expand(ndrange, groupidx, workitems(ndrange)[idx])
+    return expand(ndrange, groupidx, workitems(ndrange)[idx])
 end
 
 Base.@propagate_inbounds function expand(ndrange::NDRange{N}, groupidx::Integer, idx::CartesianIndex{N}) where {N}
-    expand(ndrange, blocks(ndrange)[groupidx], idx)
+    return expand(ndrange, blocks(ndrange)[groupidx], idx)
 end
 
 """
diff --git a/src/reflection.jl b/src/reflection.jl
index da3ba1fb..53142cc1 100644
--- a/src/reflection.jl
+++ b/src/reflection.jl
@@ -34,7 +34,7 @@ end
 
 
 function ka_code_llvm(kernel, argtypes; ndrange = nothing, workgroupsize = nothing, kwargs...)
-    ka_code_llvm(stdout, kernel, argtypes; ndrange = ndrange, workgroupsize = nothing, kwargs...)
+    return ka_code_llvm(stdout, kernel, argtypes; ndrange = ndrange, workgroupsize = nothing, kwargs...)
 end
 
 function ka_code_llvm(io::IO, kernel, argtypes; ndrange = nothing, workgroupsize = nothing, kwargs...)
@@ -119,7 +119,7 @@ macro ka_code_typed(ex0...)
 
     thecall = InteractiveUtils.gen_call_with_extracted_types_and_kwargs(__module__, :ka_code_typed, ex)
 
-    quote
+    return quote
         local $(esc(args)) = $(old_args)
         # e.g. translate CuArray to CuBackendArray
         $(esc(args)) = map(x -> argconvert($kern, x), $(esc(args)))
@@ -152,7 +152,7 @@ macro ka_code_llvm(ex0...)
 
     thecall = InteractiveUtils.gen_call_with_extracted_types_and_kwargs(__module__, :ka_code_llvm, ex)
 
-    quote
+    return quote
         local $(esc(args)) = $(old_args)
 
         if isa($kern, Kernel{G} where {G <: GPU})
diff --git a/test/compiler.jl b/test/compiler.jl
index cf86386e..75d17ec2 100644
--- a/test/compiler.jl
+++ b/test/compiler.jl
@@ -39,7 +39,7 @@ end
 
 function compiler_testsuite(backend, ArrayT)
     kernel = index(CPU(), DynamicSize(), DynamicSize())
-    iterspace = NDRange{1, StaticSize{(128,)}, StaticSize{(8,)}}();
+    iterspace = NDRange{1, StaticSize{(128,)}, StaticSize{(8,)}}()
     ctx = KernelAbstractions.mkcontext(kernel, 1, nothing, iterspace, Val(KernelAbstractions.NoDynamicCheck()))
     @test KernelAbstractions.__index_Global_NTuple(ctx, CartesianIndex(1)) == (1,)
 
@@ -74,4 +74,5 @@ function compiler_testsuite(backend, ArrayT)
         # test that there is no invoke of overdub
         @test !any(check_for_overdub, CI.code)
     end
+    return
 end
diff --git a/test/convert.jl b/test/convert.jl
index a87f4801..7b6c2402 100644
--- a/test/convert.jl
+++ b/test/convert.jl
@@ -63,4 +63,5 @@ function convert_testsuite(backend, ArrayT)
             @test d_B[:, i + 20] == round.(d_A)
         end
     end
+    return
 end
diff --git a/test/copyto.jl b/test/copyto.jl
index 9cdf8cfe..53d698be 100644
--- a/test/copyto.jl
+++ b/test/copyto.jl
@@ -17,4 +17,5 @@ function copyto_testsuite(Backend, ArrayT)
 
     @test isapprox(a, Array(A))
     @test isapprox(a, Array(B))
+    return
 end
diff --git a/test/examples.jl b/test/examples.jl
index a3cb04d1..02374db8 100644
--- a/test/examples.jl
+++ b/test/examples.jl
@@ -6,7 +6,7 @@ function find_sources(path::String, sources = String[])
     elseif endswith(path, ".jl")
         push!(sources, path)
     end
-    sources
+    return sources
 end
 
 function examples_testsuite(backend_str)
@@ -27,4 +27,5 @@ function examples_testsuite(backend_str)
         end
 
     end
+    return
 end
diff --git a/test/extensions/enzyme.jl b/test/extensions/enzyme.jl
index 01403776..5d1637f7 100644
--- a/test/extensions/enzyme.jl
+++ b/test/extensions/enzyme.jl
@@ -11,6 +11,7 @@ function square_caller(A, backend)
     kernel = square!(backend)
     kernel(A, ndrange = size(A))
     KernelAbstractions.synchronize(backend)
+    return
 end
 
 
@@ -23,6 +24,7 @@ function mul_caller(A, B, backend)
     kernel = mul!(backend)
     kernel(A, B, ndrange = size(A))
     KernelAbstractions.synchronize(backend)
+    return
 end
 
 function enzyme_testsuite(backend, ArrayT, supports_reverse = true)
@@ -58,4 +60,5 @@ function enzyme_testsuite(backend, ArrayT, supports_reverse = true)
         @test all(dA .≈ 2:2:128)
 
     end
+    return
 end
diff --git a/test/localmem.jl b/test/localmem.jl
index b03bfa74..a31235d0 100644
--- a/test/localmem.jl
+++ b/test/localmem.jl
@@ -8,7 +8,7 @@ using Test
     end
     I = @index(Global, Linear)
     i = @index(Local, Linear)
-    lmem = @localmem Int (N,) # Ok iff groupsize is static 
+    lmem = @localmem Int (N,) # Ok iff groupsize is static
     @inbounds begin
         lmem[i] = i
         @synchronize
@@ -23,7 +23,7 @@ end
     end
     I = @index(Global, Linear)
     i = @index(Local, Linear)
-    lmem = @localmem Int (N,) # Ok iff groupsize is static 
+    lmem = @localmem Int (N,) # Ok iff groupsize is static
     @inbounds begin
         lmem[i] = i + 3
         for j in 1:2
@@ -47,4 +47,5 @@ function localmem_testsuite(backend, ArrayT)
             @test all(B[49:64] .== 16:-1:1)
         end
     end
+    return
 end
diff --git a/test/nditeration.jl b/test/nditeration.jl
index e00ac3ce..95e036cf 100644
--- a/test/nditeration.jl
+++ b/test/nditeration.jl
@@ -28,7 +28,7 @@ function nditeration_testsuite()
 
     function check(idx, offset, offset_x, offset_y, Dim_x, Dim_y)
         N = Dim_x * Dim_y
-        all(p -> p[1] == p[2], zip(idx[(offset * N .+ 1):N], CartesianIndices(((offset_x * Dim_x .+ 1):Dim_x, (offset_y * Dim_y .+ 1):Dim_y))))
+        return all(p -> p[1] == p[2], zip(idx[(offset * N .+ 1):N], CartesianIndices(((offset_x * Dim_x .+ 1):Dim_x, (offset_y * Dim_y .+ 1):Dim_y))))
     end
 
     @testset "linear_iteration" begin
@@ -85,4 +85,5 @@ function nditeration_testsuite()
             end
         end
     end
+    return
 end
diff --git a/test/print_test.jl b/test/print_test.jl
index 79b52730..95648150 100644
--- a/test/print_test.jl
+++ b/test/print_test.jl
@@ -23,4 +23,5 @@ function printing_testsuite(backend)
         end
         @test true
     end
+    return
 end
diff --git a/test/private.jl b/test/private.jl
index 02c25aea..b98dd45a 100644
--- a/test/private.jl
+++ b/test/private.jl
@@ -106,4 +106,5 @@ function private_testsuite(backend, ArrayT)
             end
         end
     end
+    return
 end
diff --git a/test/reflection.jl b/test/reflection.jl
index e5ae39cf..6ce46b2b 100644
--- a/test/reflection.jl
+++ b/test/reflection.jl
@@ -29,6 +29,7 @@ function test_typed_kernel_dynamic(backend, backend_str, ArrayT)
         @test isa(res, Pair{Core.CodeInfo, DataType})
     end
     @test isa(res[1].code, Array{Any, 1})
+    return
 end
 
 function test_typed_kernel_dynamic_no_info(backend, backend_str, ArrayT)
@@ -43,6 +44,7 @@ function test_typed_kernel_dynamic_no_info(backend, backend_str, ArrayT)
         @test isa(res, Pair{Core.CodeInfo, DataType})
     end
     @test isa(res[1].code, Array{Any, 1})
+    return
 end
 
 function test_typed_kernel_static(backend, backend_str, ArrayT)
@@ -59,6 +61,7 @@ function test_typed_kernel_static(backend, backend_str, ArrayT)
         @test isa(res, Pair{Core.CodeInfo, DataType})
     end
     @test isa(res[1].code, Array{Any, 1})
+    return
 end
 
 function test_typed_kernel_no_optimize(backend, backend_str, ArrayT)
@@ -72,6 +75,7 @@ function test_typed_kernel_no_optimize(backend, backend_str, ArrayT)
     res_opt = @ka_code_typed kernel(A, ndrange = size(A))
     # FIXME: Need a better test
     # @test size(res[1].code) < size(res_opt[1].code)
+    return
 end
 
 function test_expr_kernel(backend, backend_str, ArrayT)
@@ -89,6 +93,7 @@ function test_expr_kernel(backend, backend_str, ArrayT)
         @test isa(res, Pair{Core.CodeInfo, DataType})
     end
     @test isa(res[1].code, Array{Any, 1})
+    return
 end
 
 function reflection_testsuite(backend, backend_str, ArrayT)
@@ -97,4 +102,5 @@ function reflection_testsuite(backend, backend_str, ArrayT)
     test_typed_kernel_static(backend, backend_str, ArrayT)
     test_typed_kernel_no_optimize(backend, backend_str, ArrayT)
     test_expr_kernel(backend, backend_str, ArrayT)
+    return
 end
diff --git a/test/specialfunctions.jl b/test/specialfunctions.jl
index 26198646..b41d0876 100644
--- a/test/specialfunctions.jl
+++ b/test/specialfunctions.jl
@@ -53,4 +53,5 @@ function specialfunctions_testsuite(Backend)
         synchronize(backend)
         @test Array(cy) ≈ SpecialFunctions.erfc.(x)
     end
+    return
 end
diff --git a/test/test.jl b/test/test.jl
index 337af10d..d86d9803 100644
--- a/test/test.jl
+++ b/test/test.jl
@@ -154,7 +154,7 @@ function unittest_testsuite(Backend, backend_str, backend_mod, BackendArrayT; sk
     @conditional_testset "Const" skip_tests begin
         let kernel = constarg(Backend(), 8, (1024,))
             # this is poking at internals
-            iterspace = NDRange{1, StaticSize{(128,)}, StaticSize{(8,)}}();
+            iterspace = NDRange{1, StaticSize{(128,)}, StaticSize{(8,)}}()
             ctx = if Backend == CPU
                 KernelAbstractions.mkcontext(kernel, 1, nothing, iterspace, Val(NoDynamicCheck()))
             else
@@ -266,6 +266,7 @@ function unittest_testsuite(Backend, backend_str, backend_mod, BackendArrayT; sk
     function f(KernelAbstractions.@context, a)
         I = @index(Global, Linear)
         a[I] = 1
+        return
     end
     @kernel cpu = false function context_kernel(a)
         f(KernelAbstractions.@context, a)
@@ -324,4 +325,5 @@ function unittest_testsuite(Backend, backend_str, backend_mod, BackendArrayT; sk
         end
     end
 
+    return
 end
diff --git a/test/testsuite.jl b/test/testsuite.jl
index a92cf73a..c4696cd1 100644
--- a/test/testsuite.jl
+++ b/test/testsuite.jl
@@ -11,7 +11,7 @@ const Pkg = Base.require(
 )
 
 macro conditional_testset(name, skip_tests, expr)
-    esc(
+    return esc(
         quote
             @testset $name begin
                 if $name ∉ $skip_tests
@@ -86,6 +86,8 @@ function testsuite(backend, backend_str, backend_mod, AT, DAT; skip_tests = Set{
     @conditional_testset "Examples" skip_tests begin
         examples_testsuite(backend_str)
     end
+
+    return
 end
 
 end
diff --git a/test/unroll.jl b/test/unroll.jl
index 7bbbab4d..c9a64466 100644
--- a/test/unroll.jl
+++ b/test/unroll.jl
@@ -44,4 +44,5 @@ function unroll_testsuite(backend, ArrayT)
     kernel2! = kernel_unroll2!(backend(), 1, 1)
     kernel2!(a)
     synchronize(backend())
+    return
 end