diff --git a/examples/matmul.jl b/examples/matmul.jl index f28f9100..11d5c152 100644 --- a/examples/matmul.jl +++ b/examples/matmul.jl @@ -2,34 +2,34 @@ using KernelAbstractions, Test, Random include(joinpath(dirname(pathof(KernelAbstractions)), "../examples/utils.jl")) # Load backend # Simple kernel for matrix multiplication -@kernel function matmul_kernel!(a, b, c) +@kernel function matmul_kernel!(output, a, b) i, j = @index(Global, NTuple) # creating a temporary sum variable for matrix multiplication - tmp_sum = zero(eltype(c)) + tmp_sum = zero(eltype(output)) for k = 1:size(a)[2] tmp_sum += a[i,k] * b[k, j] end - c[i,j] = tmp_sum + output[i,j] = tmp_sum end # Creating a wrapper kernel for launching with error checks -function matmul!(a, b, c) +function matmul!(output, a, b) if size(a)[2] != size(b)[1] println("Matrix size mismatch!") return nothing end backend = KernelAbstractions.get_backend(a) kernel! = matmul_kernel!(backend) - kernel!(a, b, c, ndrange=size(c)) + kernel!(output, a, b, ndrange=size(output)) end a = rand!(allocate(backend, Float32, 256, 123)) b = rand!(allocate(backend, Float32, 123, 45)) -c = KernelAbstractions.zeros(backend, Float32, 256, 45) +output = KernelAbstractions.zeros(backend, Float32, 256, 45) -matmul!(a,b,c) +matmul!(output, a,b) KernelAbstractions.synchronize(backend) -@test isapprox(c, a*b) +@test isapprox(output, a*b) diff --git a/examples/naive_transpose.jl b/examples/naive_transpose.jl index de346be5..bca184fa 100644 --- a/examples/naive_transpose.jl +++ b/examples/naive_transpose.jl @@ -3,7 +3,7 @@ include(joinpath(dirname(pathof(KernelAbstractions)), "../examples/utils.jl")) # @kernel function naive_transpose_kernel!(a, b) i, j = @index(Global, NTuple) - @inbounds b[i, j] = a[j, i] + @inbounds a[i, j] = b[j, i] end # create wrapper function to check inputs @@ -24,8 +24,8 @@ end res = 1024 # creating initial arrays -a = rand!(allocate(backend, Float32, res, res)) -b = KernelAbstractions.zeros(backend, Float32, res, res) +b = rand!(allocate(backend, Float32, res, res)) +a = KernelAbstractions.zeros(backend, Float32, res, res) naive_transpose!(a,b) KernelAbstractions.synchronize(backend)