Skip to content

Commit

Permalink
Split unit tests from benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
charleskawczynski committed Oct 4, 2024
1 parent 9136e7f commit ef0bb17
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 9 deletions.
22 changes: 21 additions & 1 deletion .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,35 @@ steps:

- label: "CPU tests"
key: tests_cpu
command: "julia --color=yes --project=.buildkite test/runtests.jl"
command: "julia --color=yes --check-bounds=yes --project=.buildkite test/runtests.jl"

- label: "CUDA tests"
key: tests_cuda
command:
- "julia --project=.buildkite -e 'using CUDA; CUDA.versioninfo()'"
- "julia --color=yes --check-bounds=yes --project=.buildkite test/runtests.jl"
env:
USE_CUDA: "true"
agents:
slurm_gpus: 1

- group: "Benchmarks"
steps:

- label: "CPU benchmarks"
key: bm_cpu
command: "julia --color=yes --project=.buildkite test/runtests.jl"
env:
PERFORM_BENCHMARKS: "true"

- label: "CUDA benchmarks"
key: bm_cuda
command:
- "julia --project=.buildkite -e 'using CUDA; CUDA.versioninfo()'"
- "julia --color=yes --project=.buildkite test/runtests.jl"
env:
USE_CUDA: "true"
PERFORM_BENCHMARKS: "true"
agents:
slurm_gpus: 1

15 changes: 12 additions & 3 deletions ext/MultiBroadcastFusionCUDAExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,29 @@ MBF.device(x::CUDA.CuArray) = MBF.MBF_CUDA()
function fused_copyto!(fmb::MBF.FusedMultiBroadcast, ::MBF.MBF_CUDA)
(; pairs) = fmb
dest = first(pairs).first
destinations = map(p -> p.first, pairs)
nitems = length(parent(dest))
max_threads = 256 # can be higher if conditions permit
nthreads = min(max_threads, nitems)
nblocks = cld(nitems, nthreads)
CUDA.@cuda threads = (nthreads) blocks = (nblocks) fused_copyto_kernel!(fmb)
a1 = axes(dest)
all(a -> axes(a) == axes(dest), destinations) ||
error("Cannot fuse broadcast expressions with unequal broadcast axes")
CI = CartesianIndices(axes(dest))
CUDA.@cuda threads = (nthreads) blocks = (nblocks) fused_copyto_kernel!(
fmb,
CI,
)
return nothing
end
function fused_copyto_kernel!(fmb::MBF.FusedMultiBroadcast)
import Base.Broadcast
function fused_copyto_kernel!(fmb::MBF.FusedMultiBroadcast, CI)
(; pairs) = fmb
dest = first(pairs).first
nitems = length(dest)
idx = CUDA.threadIdx().x + (CUDA.blockIdx().x - 1) * CUDA.blockDim().x
if idx nitems
MBF.rcopyto_at!(pairs, idx)
MBF.rcopyto_at!(pairs, CI[idx])
end
return nothing
end
Expand Down
10 changes: 10 additions & 0 deletions test/execution/utils_benchmark.jl
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,11 @@ trunc_time(s::String) = count(',', s) > 1 ? join(split(s, ",")[1:2], ",") : s

import PrettyTables
function tabulate_benchmark(bm)
perform_benchmark = get(ENV, "PERFORM_BENCHMARK", false) == "true"
if !perform_benchmark
@warn "Benchmark skipped, set `ENV[\"PERFORM_BENCHMARK\"] = true` to run benchmarks"
return nothing
end
funcs = map(x -> strip(x.caller), bm.data)
timings = map(x -> time_and_units_str(x.kernel_time_s), bm.data)
n_reads_writes = map(x -> x.n_reads_writes, bm.data)
Expand Down Expand Up @@ -159,6 +164,11 @@ function benchmark_trial!(use_cuda, f!, X, Y)
end

function push_benchmark!(bm, use_cuda, f!, X, Y; n_reads_writes, problem_size)
perform_benchmark = get(ENV, "PERFORM_BENCHMARK", false) == "true"
if !perform_benchmark
@warn "Benchmark skipped, set `ENV[\"PERFORM_BENCHMARK\"] = true` to run benchmarks"
return nothing
end
f!(X, Y) # compile first
trial = benchmark_trial!(use_cuda, f!, X, Y)
e = minimum(trial.times) * 1e-9 # to seconds
Expand Down
7 changes: 2 additions & 5 deletions test/execution/utils_test.jl
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,8 @@ rcompare(x::T, y::T) where {T <: NamedTuple} = _rcompare(true, x, y)
rcompare(x, y) = false

function test_compare(x, y)
if !rcompare(x, y)
@rprint_diff(x, y)
else
@test rcompare(x, y)
end
rcompare(x, y) || @rprint_diff(x, y)
@test rcompare(x, y)
end

function test_kernel!(; fused!, unfused!, X, Y)
Expand Down

0 comments on commit ef0bb17

Please sign in to comment.