From f53e93ffd0839e9b3f33a19f3350a8460e182db0 Mon Sep 17 00:00:00 2001 From: roflmaostc Date: Fri, 1 Dec 2023 15:28:50 +0100 Subject: [PATCH 01/11] Add simple example with Atomix.jl --- docs/make.jl | 1 + docs/src/assets/atomix_broken.png | Bin 0 -> 653 bytes docs/src/assets/atomix_correct.png | Bin 0 -> 300 bytes docs/src/examples/atomix.md | 68 +++++++++++++++++++++++++++++ 4 files changed, 69 insertions(+) create mode 100644 docs/src/assets/atomix_broken.png create mode 100644 docs/src/assets/atomix_correct.png create mode 100644 docs/src/examples/atomix.md diff --git a/docs/make.jl b/docs/make.jl index 5c6af135..00517c46 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -27,6 +27,7 @@ makedocs(; "examples/performance.md", "examples/matmul.md", "examples/numa_aware.md", + "examples/atomix.md", ], # Examples "API" => "api.md", "Extras" => [ diff --git a/docs/src/assets/atomix_broken.png b/docs/src/assets/atomix_broken.png new file mode 100644 index 0000000000000000000000000000000000000000..963b8dbb72f22b839fd02a84acaebd3433ec0aa0 GIT binary patch literal 653 zcmeAS@N?(olHy`uVBq!ia0vp^DIm-NBp5NS%G}U;vjb? zhIQv;UIIA^$sR$z3=CCj3=9n|3=F@3LJcn%7)lKo7+xhXFj&oCU=S~uvn$Ysfq{wJ z)5S3)gLUu0&fFyq0t^SDR(@_evrOH%a!H1<{t)5DY! zlc=TF^ZG6YY6#q$QKPi(P~#-InR_PflQBJeR!xLM+~Z~1RI$52gZvL-SaNxoBAg?7Lv5A+|RKcbTWlrD6&3fpl(91Xt zfq%_gJZ1+d#3Q{=gNhP7cRaDluR+X>0>w9>~16|3?|=wVSIL7HrT%}(l4!xF5M0E(i%&kVJ9;T ze|BE65$9?#cUyh3_~q6Bg*}EpeYR(v0|idPHi@@t=a+u*(h!iFVHo9WH*ppymW(9V zvhjpXz7N#zX?(l4Zn-Bgh~}JO>zQHjb9cZ4`J@+x=lja1Zvmzk22WQ%mvv4FO#p`j B4A}qx literal 0 HcmV?d00001 diff --git a/docs/src/assets/atomix_correct.png b/docs/src/assets/atomix_correct.png new file mode 100644 index 0000000000000000000000000000000000000000..9345365a32815f08dbcc3fe7a54905f04268fb4d GIT binary patch literal 300 zcmeAS@N?(olHy`uVBq!ia0vp^DIm-NBp5NS%G}U;vjb? zhIQv;UIIA^$sR$z3=CCj3=9n|3=F@3LJcn%7)lKo7+xhXFj&oCU=S~uvn$XBD6z`Z z#W5sa|KKysi$(F61F>3mg_a8m|U_t|niUK32hXa$)fHF5QUh!m} U*gT8r4$##Mp00i_>zopr06aBG7ytkO literal 0 HcmV?d00001 diff --git a/docs/src/examples/atomix.md b/docs/src/examples/atomix.md new file mode 100644 index 00000000..6b0201bf --- /dev/null +++ b/docs/src/examples/atomix.md @@ -0,0 +1,68 @@ +# Atomic operations with Atomix.jl + +In case the different kernels access the same memories, [race conditions](https://en.wikipedia.org/wiki/Race_condition) can occur. +KA is compatible with [Atomix.jl](https://github.com/JuliaConcurrent/Atomix.jl) to avoid this. + +## Race conditions + +The following goes wrong: +```julia +using CUDA, KernelAbstractions, CUDA.CUDAKernels, Atomix +using ImageShow, ImageIO + + +function index_fun(arr; backend=CUDABackend()) + out = similar(arr) + fill!(out, 0) + kernel! = my_kernel!(backend) + kernel!(out, arr, ndrange=(size(arr, 1), size(arr, 2))) + return out +end + +@kernel function my_kernel!(out, arr) + i, j = @index(Global, NTuple) + for k in 1:size(out, 1) + out[k, i] += arr[i, j] + end +end + +img = zeros(Float32, (50, 50)); +img[10:20, 10:20] .= 1; +img[35:45, 35:45] .= 2; + + +out = Array(index_fun(CuArray(img), backend=CUDABackend())); +simshow(out) +``` +In principle, this kernel just the values of the pixels along the first dimension. +However, the different `out[k, i]` are accessed by each of the kernels multiple times, so racing conditions happen. + +The resulting image has artifacts. + +![Resulting Image has artifacts](../assets/atomix_broken.png) + + +## Fix with Atomix.jl +To fix this we need to mark the critical accesses with an `Atomix.@atomic` +```julia +function index_fun_fixed(arr; backend=CUDABackend()) + out = similar(arr) + fill!(out, 0) + kernel! = my_kernel_fixed!(backend) + kernel!(out, arr, ndrange=(size(arr, 1), size(arr, 2))) + return out +end + +@kernel function my_kernel_fixed!(out, arr) + i, j = @index(Global, NTuple) + for k in 1:size(out, 1) + Atomix.@atomic out[k, i] += arr[i, j] + end +end + +out_fixed = Array(index_fun_fixed(CuArray(img), backend=CUDABackend())); +simshow(out_fixed) +``` +This image is free of artifacts. + +![Resulting image is correct.](../assets/atomix_correct.png) From 4f5b406c0a2382da34be055fc2275c027a1b65cc Mon Sep 17 00:00:00 2001 From: roflmaostc Date: Fri, 1 Dec 2023 15:31:50 +0100 Subject: [PATCH 02/11] Some text fixess --- docs/src/examples/atomix.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/src/examples/atomix.md b/docs/src/examples/atomix.md index 6b0201bf..387ec389 100644 --- a/docs/src/examples/atomix.md +++ b/docs/src/examples/atomix.md @@ -34,8 +34,9 @@ img[35:45, 35:45] .= 2; out = Array(index_fun(CuArray(img), backend=CUDABackend())); simshow(out) ``` -In principle, this kernel just the values of the pixels along the first dimension. -However, the different `out[k, i]` are accessed by each of the kernels multiple times, so racing conditions happen. +In principle, this kernel just smears the values of the pixels along the first dimension. +However, the different `out[k, i]` are accessed by each of the kernels multiple times, so racing conditions happen that some +kernels access old results or overwrite new results. The resulting image has artifacts. From 17fd20db165f1111f280f4997b862a64a3ec83e5 Mon Sep 17 00:00:00 2001 From: "Felix Wechsler (he/him)" Date: Mon, 4 Dec 2023 21:45:10 +0100 Subject: [PATCH 03/11] Update docs/src/examples/atomix.md Co-authored-by: Valentin Churavy --- docs/src/examples/atomix.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/examples/atomix.md b/docs/src/examples/atomix.md index 387ec389..e569524e 100644 --- a/docs/src/examples/atomix.md +++ b/docs/src/examples/atomix.md @@ -1,6 +1,6 @@ # Atomic operations with Atomix.jl -In case the different kernels access the same memories, [race conditions](https://en.wikipedia.org/wiki/Race_condition) can occur. +In case the different kernels access the same memory locations, [race conditions](https://en.wikipedia.org/wiki/Race_condition) can occur. KA is compatible with [Atomix.jl](https://github.com/JuliaConcurrent/Atomix.jl) to avoid this. ## Race conditions From 1e162e5cc2050eaf8f2505d05930bc5c0cd277bd Mon Sep 17 00:00:00 2001 From: "Felix Wechsler (he/him)" Date: Mon, 4 Dec 2023 21:45:19 +0100 Subject: [PATCH 04/11] Update docs/src/examples/atomix.md Co-authored-by: Valentin Churavy --- docs/src/examples/atomix.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/src/examples/atomix.md b/docs/src/examples/atomix.md index e569524e..5384da23 100644 --- a/docs/src/examples/atomix.md +++ b/docs/src/examples/atomix.md @@ -5,7 +5,8 @@ KA is compatible with [Atomix.jl](https://github.com/JuliaConcurrent/Atomix.jl) ## Race conditions -The following goes wrong: +The following example demonstrates a common race condition: + ```julia using CUDA, KernelAbstractions, CUDA.CUDAKernels, Atomix using ImageShow, ImageIO From afced7a44d641a30a822359c009720a2aec6a20b Mon Sep 17 00:00:00 2001 From: "Felix Wechsler (he/him)" Date: Mon, 4 Dec 2023 21:45:49 +0100 Subject: [PATCH 05/11] Update docs/src/examples/atomix.md Co-authored-by: Valentin Churavy --- docs/src/examples/atomix.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/examples/atomix.md b/docs/src/examples/atomix.md index 5384da23..9165bea7 100644 --- a/docs/src/examples/atomix.md +++ b/docs/src/examples/atomix.md @@ -12,7 +12,7 @@ using CUDA, KernelAbstractions, CUDA.CUDAKernels, Atomix using ImageShow, ImageIO -function index_fun(arr; backend=CUDABackend()) +function index_fun(arr; backend=get_backend(arr)) out = similar(arr) fill!(out, 0) kernel! = my_kernel!(backend) From 1011da29ac925e0c4c8571b22e27c42cdc6f283c Mon Sep 17 00:00:00 2001 From: "Felix Wechsler (he/him)" Date: Mon, 4 Dec 2023 21:46:02 +0100 Subject: [PATCH 06/11] Update docs/src/examples/atomix.md Co-authored-by: Valentin Churavy --- docs/src/examples/atomix.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/examples/atomix.md b/docs/src/examples/atomix.md index 9165bea7..80f4355f 100644 --- a/docs/src/examples/atomix.md +++ b/docs/src/examples/atomix.md @@ -62,7 +62,7 @@ end end end -out_fixed = Array(index_fun_fixed(CuArray(img), backend=CUDABackend())); +out_fixed = Array(index_fun_fixed(CuArray(img))); simshow(out_fixed) ``` This image is free of artifacts. From 47bc3bfe93d5d6abc4b1cf57d8a53718f7c44ae2 Mon Sep 17 00:00:00 2001 From: "Felix Wechsler (he/him)" Date: Mon, 4 Dec 2023 21:46:09 +0100 Subject: [PATCH 07/11] Update docs/src/examples/atomix.md Co-authored-by: Valentin Churavy --- docs/src/examples/atomix.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/examples/atomix.md b/docs/src/examples/atomix.md index 80f4355f..12fd4af3 100644 --- a/docs/src/examples/atomix.md +++ b/docs/src/examples/atomix.md @@ -1,7 +1,7 @@ # Atomic operations with Atomix.jl In case the different kernels access the same memory locations, [race conditions](https://en.wikipedia.org/wiki/Race_condition) can occur. -KA is compatible with [Atomix.jl](https://github.com/JuliaConcurrent/Atomix.jl) to avoid this. +KernelAbstractions uses [Atomix.jl](https://github.com/JuliaConcurrent/Atomix.jl) to provide access to atomic memory operations. ## Race conditions From b6fba1cd5940425907fa9a83f225fc1b172420c4 Mon Sep 17 00:00:00 2001 From: "Felix Wechsler (he/him)" Date: Mon, 4 Dec 2023 21:46:27 +0100 Subject: [PATCH 08/11] Update docs/src/examples/atomix.md Co-authored-by: Valentin Churavy --- docs/src/examples/atomix.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/examples/atomix.md b/docs/src/examples/atomix.md index 12fd4af3..bd512cac 100644 --- a/docs/src/examples/atomix.md +++ b/docs/src/examples/atomix.md @@ -32,7 +32,7 @@ img[10:20, 10:20] .= 1; img[35:45, 35:45] .= 2; -out = Array(index_fun(CuArray(img), backend=CUDABackend())); +out = Array(index_fun(CuArray(img))); simshow(out) ``` In principle, this kernel just smears the values of the pixels along the first dimension. From e65af6729dd94017eec0200ba6f916c0e67aa4b1 Mon Sep 17 00:00:00 2001 From: "Felix Wechsler (he/him)" Date: Mon, 4 Dec 2023 21:46:32 +0100 Subject: [PATCH 09/11] Update docs/src/examples/atomix.md Co-authored-by: Valentin Churavy --- docs/src/examples/atomix.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/examples/atomix.md b/docs/src/examples/atomix.md index bd512cac..7758248c 100644 --- a/docs/src/examples/atomix.md +++ b/docs/src/examples/atomix.md @@ -8,7 +8,7 @@ KernelAbstractions uses [Atomix.jl](https://github.com/JuliaConcurrent/Atomix.j The following example demonstrates a common race condition: ```julia -using CUDA, KernelAbstractions, CUDA.CUDAKernels, Atomix +using CUDA, KernelAbstractions, Atomix using ImageShow, ImageIO From dffb3b0bf04961230abe47ea65f00ad605fcd10a Mon Sep 17 00:00:00 2001 From: "Felix Wechsler (he/him)" Date: Mon, 4 Dec 2023 21:46:48 +0100 Subject: [PATCH 10/11] Update docs/src/examples/atomix.md Co-authored-by: Valentin Churavy --- docs/src/examples/atomix.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/src/examples/atomix.md b/docs/src/examples/atomix.md index 7758248c..46e13aec 100644 --- a/docs/src/examples/atomix.md +++ b/docs/src/examples/atomix.md @@ -35,9 +35,10 @@ img[35:45, 35:45] .= 2; out = Array(index_fun(CuArray(img))); simshow(out) ``` -In principle, this kernel just smears the values of the pixels along the first dimension. -However, the different `out[k, i]` are accessed by each of the kernels multiple times, so racing conditions happen that some -kernels access old results or overwrite new results. +In principle, this kernel should just smears the values of the pixels along the first dimension. + +However, the different `out[k, i]` are accessed from multiple work-items and thus memory races can occur. +We need to ensure that the accumulate `+=` occurs atomically. The resulting image has artifacts. From b00f6031c25e5857b805017feec9394a3e4d6131 Mon Sep 17 00:00:00 2001 From: "Felix Wechsler (he/him)" Date: Mon, 4 Dec 2023 21:46:55 +0100 Subject: [PATCH 11/11] Update docs/src/examples/atomix.md Co-authored-by: Valentin Churavy --- docs/src/examples/atomix.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/examples/atomix.md b/docs/src/examples/atomix.md index 46e13aec..5bcd3998 100644 --- a/docs/src/examples/atomix.md +++ b/docs/src/examples/atomix.md @@ -48,7 +48,7 @@ The resulting image has artifacts. ## Fix with Atomix.jl To fix this we need to mark the critical accesses with an `Atomix.@atomic` ```julia -function index_fun_fixed(arr; backend=CUDABackend()) +function index_fun_fixed(arr; backend=get_backend(arr)) out = similar(arr) fill!(out, 0) kernel! = my_kernel_fixed!(backend)