From c3df8b708c8a714265c4fe6210b7eb52990a993b Mon Sep 17 00:00:00 2001
From: Bernat Font <bernatfontgarcia@gmail.com>
Date: Wed, 31 Jul 2024 18:40:14 +0200
Subject: [PATCH] Removed benchmarks in favor of new
 https://github.com/WaterLily-jl/WaterLily-Benchmarks repo.

---
 benchmark/Project.toml |  15 ---
 benchmark/README.md    |  50 ----------
 benchmark/benchmark.jl |  87 -----------------
 benchmark/benchmark.sh | 211 -----------------------------------------
 benchmark/compare.jl   | 115 ----------------------
 benchmark/util.jl      | 202 ---------------------------------------
 6 files changed, 680 deletions(-)
 delete mode 100644 benchmark/Project.toml
 delete mode 100644 benchmark/README.md
 delete mode 100644 benchmark/benchmark.jl
 delete mode 100755 benchmark/benchmark.sh
 delete mode 100644 benchmark/compare.jl
 delete mode 100644 benchmark/util.jl

diff --git a/benchmark/Project.toml b/benchmark/Project.toml
deleted file mode 100644
index eaedcf8..0000000
--- a/benchmark/Project.toml
+++ /dev/null
@@ -1,15 +0,0 @@
-[deps]
-AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
-BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
-CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
-CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
-ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4"
-GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
-Glob = "c27321d9-0574-5035-807b-f59d2c89b15c"
-KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
-LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
-Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
-PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
-StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
-StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd"
-WaterLily = "ed894a53-35f9-47f1-b17f-85db9237eebd"
diff --git a/benchmark/README.md b/benchmark/README.md
deleted file mode 100644
index 98dd273..0000000
--- a/benchmark/README.md
+++ /dev/null
@@ -1,50 +0,0 @@
-# Automatic benchmark generation suite
-
-Suite to generate benchmarks across different WaterLily versions, Julia versions (using [**juliaup**](https://github.com/JuliaLang/juliaup)), backends, cases, and cases sizes using the [benchmark.sh](./benchmark.sh) script.
-
-## TL;DR
-Usage example
-```sh
-sh benchmark.sh -v "1.9.4 1.10.0" -t "1 4" -b "Array CuArray" -c "tgv jelly" -p "6,7 5,6" -s "100 100" -ft "Float32 Float64"
-julia --project compare.jl --datadir="data" --plotdir="plots" --patterns=["tgv","jelly"] --sort=1
-```
-runs both the TGV and jelly benchmarks (`-c`) at the current WaterLily state in 2 different Julia versions (1.9.4 and 1.10.0-rc1, noting that these need to be available in juliaup), and 3 different backends (CPUx01, CPUx04, CUDA). The cases size `-p`, number of time steps `-s`, and float type `-ft` are bash (ordered) arrays which need to be equally sized to `-c` and specify each benchmark case (respectively).
-The default benchmarks launch (`sh benchmark.sh`) is equivalent to:
-```sh
-sh benchmark.sh -w "" -ju true -v release -t "1 4" -b "Array CuArray" -c "tgv jelly" -p "6,7 5,6" -s "100 100" -ft "Float32 Float32"
-```
-Note that `-w` or `--waterlily` can be used to pass different WaterLily versions, but this feature will only work for WaterLily>1.2 releases (since checking out to an old WaterLily version will also change the benchmark suite). This will be fixed by moving the benchmark suite to its own repository.
-
-Benchmarks are then post-processed using the `compare.jl` script. Plots can be generated by passing the `--plotdir` argument. Note that `--patterns` can be passed to post-process only certain benchmarks. Alternatively, benchmark files can also be passed directly as arguments with
-```sh
-julia --project compare.jl --plotdir="plots" $(find data/ \( -name "tgv*json" -o -name "jelly*json" \) -printf "%T@ %Tc %p\n" | sort -n | awk '{print $7}')
-```
-
-## Usage information
-The accepted command line arguments are (parenthesis for short version):
- - Backend arguments: `--waterlily(-w)`, `--juliaup(-ju)`, `--versions(-v)`, `--backends(-b)`, `--threads(-t)`. Respectively: List of WaterLily git hashes to test, usage of juliaup or not (`true` by default, but use `false` to run with default Julia in system), Julia version, backend types, number of threads (when `--backends` contains `Array`). The latter 3 arguments accept a list of different parameters, for example:
-    ```sh
-    -w "fae590d e22ad41" -v "1.8.5 1.9.4" -b "Array CuArray" -t "1 6"
-    ```
-    would generate benchmark for all these combinations of parameters.
- - Case arguments: `--cases(-c)`, `--log2p(-p)`, `--max_steps(-s)`, `--ftype(-ft)`. The `--cases` argument specifies which cases to benchmark, and it can be again a list of different cases. The name of the cases needs to be defined in [benchmark.jl](./benchmark.jl), for example `tgv` or `jelly`. The current available cases are `"tgv sphere cylinder donut jelly"`. Hence, to add a new case first define the function that returns a `Simulation` in [benchmark.jl](./benchmark.jl), and then it can be called using the `--cases(-c)` list argument. Case size, number of time steps, and float data type are then defined for each case (`-p`, `-s`, `-ft`, respectively). All case arguments must have an equal length since each element of the array defines the case in different aspects.
-
-The following command
-```sh
-sh benchmark.sh -v release -t "1 3 6" -b "Array CuArray" -c "tgv sphere" -p "6,7,8 5,6" -s "10 100" -ft "Float64 Float32"
-```
-would allow running benchmarks with 4 backends: CPUx01 (serial), CPUx03, CPUx06, GPU. Additionally, two benchmarks would be tested, `tgv` and `sphere`, with different sizes, number of time steps, and float type, each. This would result into 1 Julia version x (3 Array + 1 CuArray) backends x (3 TGV sizes + 2 jelly sizes) = 20 benchmarks.
-
-Benchmarks are saved in JSON format with the following nomenclature: `casename_sizes_maxsteps_ftype_backend_waterlilyHEADhash_juliaversion.json`. Benchmarks can be finally compared using [`compare.jl`](./compare.jl) as follows
-```sh
-julia --project compare.jl benchmark_1.json benchmark_2.json benchmark_3.json ...
-```
-or by using pattern syntax
-```sh
-julia --project compare.jl --datadir="data" --patterns=["tgv*CPU"]
-```
-for which only TGV benchmarks on a CPU backend found in the `"data"` directory would be processed. The following syntax would be produce equivalent results:
-```sh
-julia --project compare.jl $(find data -name "tgv*CPU.json" -printf "%T@ %Tc %p\n" | sort -n | awk '{print $7}') --sort=1
-```
-by taking the `tgv` JSON files, sort them by creation time, and pass them as arguments to the `compare.jl` program. Finally, note that the first benchmark passed as argument is taken as reference to compute speed-ups of other benchmarks: `speedup_x = time(benchmark_1) / time(benchmark_x)`. The `--sort=<1 to 8>` argument can also be used when running the comparison. It will sort the benchmark table rows by the values corresponding to the column index passed as argument. `--sort=1` corresponds to sorting by backend. The baseline row is highlighted in blue, and the fastest run in a table is highlighted in green.
diff --git a/benchmark/benchmark.jl b/benchmark/benchmark.jl
deleted file mode 100644
index 0376dfb..0000000
--- a/benchmark/benchmark.jl
+++ /dev/null
@@ -1,87 +0,0 @@
-using WaterLily
-using BenchmarkTools
-using KernelAbstractions: synchronize, get_backend
-using StaticArrays
-include("util.jl")
-
-# Define simulation benchmarks
-function tgv(p, backend; Re=1600, T=Float32)
-    L = 2^p; U = 1; κ=π/L; ν = 1/(κ*Re)
-    function uλ(i,xyz)
-        x,y,z = @. xyz/L*π
-        i==1 && return -U*sin(x)*cos(y)*cos(z)
-        i==2 && return  U*cos(x)*sin(y)*cos(z)
-        return 0.
-    end
-    Simulation((L, L, L), (0, 0, 0), 1/κ; U=U, uλ=uλ, ν=ν, T=T, mem=backend)
-end
-
-function sphere(p, backend; Re=3700, U=1, T=Float32)
-    D = 2^p; ν = U*D/Re
-    L = (16D, 6D, 6D)
-    center = @SVector T[1.5D, 3D, 3D]; radius = T(D/2)
-    body = AutoBody((x,t) -> √sum(abs2, x .- center) - radius)
-    Simulation(L, (U, 0, 0), D; U=U, ν=ν, body=body, T=T, mem=backend, exitBC=true)
-end
-
-function cylinder(p, backend; Re=1e3, U=1, T=Float32)
-    L = 2^p; R = L/2; ν = U*L/Re
-    center = SA[1.5L, 3L, 0]
-    function sdf(xyz, t)
-        x, y, z = xyz - center
-        √sum(abs2, SA[x, y, 0]) - R
-    end
-    function map(xyz, t)
-        xyz - SA[0, R*sin(t*U/L), 0]
-    end
-    Simulation((12L, 6L, 2L), (U, 0, 0), L; U=U, ν=ν, body=AutoBody(sdf, map), T=T, mem=backend, exitBC=true, perdir=(3,))
-end
-
-function donut(p, backend; Re=1e3, U=1, T=Float32)
-    L = 2^p
-    center, R, r = SA[L/2, L/2, L/2], L/4, L/16
-    ν = U*R/Re
-    norm2(x) = √sum(abs2,x)
-    body = AutoBody() do xyz, t
-        x, y, z = xyz - center
-        norm2(SA[x, norm2(SA[y, z]) - R]) - r
-    end
-    Simulation((2L, L, L), (U, 0, 0), R; ν, body, T=T, mem=backend)
-end
-
-function jelly(p, backend; Re=5e2, U=1, T=Float32)
-    n = 2^p; R = 2n/3; h = 4n - 2R; ν = U*R/Re
-    ω = 2U/R
-    @fastmath @inline A(t) = 1 .- SA[1,1,0]*0.1*cos(ω*t)
-    @fastmath @inline B(t) = SA[0,0,1]*((cos(ω*t) - 1)*R/4-h)
-    @fastmath @inline C(t) = SA[0,0,1]*sin(ω*t)*R/4
-    sphere = AutoBody((x,t)->abs(√sum(abs2, x) - R) - 1,
-                      (x,t)->A(t).*x + B(t) + C(t))
-    plane = AutoBody((x,t)->x[3] - h, (x, t) -> x + C(t))
-    body =  sphere - plane
-    Simulation((n, n, 4n), (0, 0, -U), R; ν, body, T=T, mem=backend)
-end
-
-# Generate benchmarks
-function run_benchmarks(cases, log2p, max_steps, ftype, backend, bstr; datadir="./")
-    for (case, p, s, ft) in zip(cases, log2p, max_steps, ftype)
-        println("Benchmarking: $(case)")
-        suite = BenchmarkGroup()
-        results = BenchmarkGroup([case, "sim_step!", p, s, ft, bstr, git_hash, string(VERSION)])
-        add_to_suite!(suite, getf(case); p=p, s=s, ft=ft, backend=backend, bstr=bstr,
-            remeasure=any(occursin.(["cylinder", "jelly"], case))
-        ) # create benchmark
-        results[bstr] = run(suite[bstr], samples=1, evals=1, seconds=1e6, verbose=true) # run!
-        fname = "$(case)_$(p...)_$(s)_$(ft)_$(bstr)_$(git_hash)_$VERSION.json"
-        BenchmarkTools.save(joinpath(datadir,fname), results)
-    end
-end
-
-cases, log2p, max_steps, ftype, backend = parse_cla(ARGS;
-    cases=["tgv", "jelly"], log2p=[(6,7), (5,6)], max_steps=[100, 100], ftype=[Float32, Float32], backend=Array
-)
-
-# Generate benchmark data
-datadir = joinpath("data", git_hash)
-mkpath(datadir)
-run_benchmarks(cases, log2p, max_steps, ftype, backend, backend_str[backend]; datadir)
\ No newline at end of file
diff --git a/benchmark/benchmark.sh b/benchmark/benchmark.sh
deleted file mode 100755
index 999803c..0000000
--- a/benchmark/benchmark.sh
+++ /dev/null
@@ -1,211 +0,0 @@
-#!/bin/bash
-
-THIS_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
-export WATERLILY_ROOT=$(dirname "${THIS_DIR}")
-export JULIA_NUM_THREADS="auto"
-
-# Utils
-join_array_comma () {
-    arr=("$@")
-    printf -v joined '%s,' $arr
-    echo "[${joined%,}]"
-}
-join_array_str_comma () {
-    arr=("$@")
-    printf -v joined '\"%s\",' $arr
-    echo "[${joined%,}]"
-}
-join_array_tuple_comma () {
-    arr=("$@")
-    printf -v joined '(%s),' $arr
-    echo "[${joined%,}]"
-}
-# Check if juliaup exists in environment
-check_if_juliaup () {
-    if [ command -v juliaup ] &> /dev/null || [ ! $JULIAUP ]
-    then # juliaup does not exist or $JULIAUP is false
-        return 1
-    else # run with juliaup
-        return 0
-    fi
-}
-# Grep current julia version
-julia_version () {
-    julia_v=($(julia -v))
-    echo "${julia_v[2]}"
-}
-# Get current WaterLily version
-waterlily_version () {
-    waterlily_v=($(git -C $WATERLILY_ROOT rev-parse --short HEAD))
-    echo "${waterlily_v}"
-}
-
-# Update project environment with new Julia version: Mark WaterLily as a development packag, then update dependencies and precompile.
-update_environment () {
-    if $WATERLILY_CHECKOUT; then
-        echo "Git checkout to WaterLily $wl_version"
-        cd $WATERLILY_ROOT
-        git checkout $wl_version
-        cd $THIS_DIR
-    fi
-    if check_if_juliaup; then
-        echo "Updating environment to Julia $version and compiling WaterLily"
-        julia +${version} --project=$THIS_DIR -e "using Pkg; Pkg.develop(PackageSpec(path=get(ENV, \"WATERLILY_ROOT\", \"\"))); Pkg.update();"
-    fi
-}
-
-run_benchmark () {
-    if check_if_juliaup; then
-        full_args=(+${version} --project=${THIS_DIR} --startup-file=no $args)
-    else
-        full_args=(--project=${THIS_DIR} --startup-file=no $args)
-    fi
-
-    echo "Running: julia ${full_args[@]}"
-    julia "${full_args[@]}"
-}
-
-# Print benchamrks info
-display_info () {
-    echo "--------------------------------------"
-    echo "Running benchmark tests for:
- - Julia:        ${VERSIONS[@]}
- - Backends:     ${BACKENDS[@]}"
-    if [[ " ${BACKENDS[*]} " =~ [[:space:]]'Array'[[:space:]] ]]; then
-        echo " - CPU threads:  ${THREADS[@]}"
-    fi
-    echo " - Cases:        ${CASES[@]}
- - Size:         ${LOG2P[@]:0:$NCASES}
- - Sim. steps:   ${MAXSTEPS[@]:0:$NCASES}
- - Data type:    ${FTYPE[@]:0:$NCASES}"
-    echo "--------------------------------------"; echo
-}
-
-# Default backends
-JULIAUP=true
-JULIA_USER_VERSION=$(julia_version)
-WL_CURRENT_VERSION=$(waterlily_version)
-WL_VERSIONS=()
-BACKENDS=('Array' 'CuArray')
-THREADS=('1' '4')
-# Default cases. Arrays below must be same length (specify each case individually)
-CASES=('tgv' 'jelly')
-LOG2P=('6,7' '5,6')
-MAXSTEPS=('100' '100')
-FTYPE=('Float32' 'Float32')
-
-# Parse arguments
-while [ $# -gt 0 ]; do
-case "$1" in
-    --juliaup|-ju)
-    JULIAUP=($2)
-    shift
-    ;;
-    --waterlily|-w)
-    WL_VERSIONS=($2)
-    shift
-    ;;
-    --versions|-v)
-    VERSIONS=($2)
-    shift
-    ;;
-    --backends|-b)
-    BACKENDS=($2)
-    shift
-    ;;
-    --threads|-t)
-    THREADS=($2)
-    shift
-    ;;
-    --cases|-c)
-    CASES=($2)
-    shift
-    ;;
-    --log2p|-p)
-    LOG2P=($2)
-    shift
-    ;;
-    --max_steps|-s)
-    MAXSTEPS=($2)
-    shift
-    ;;
-    --float_type|-ft)
-    FTYPE=($2)
-    shift
-    ;;
-    *)
-    printf "ERROR: Invalid argument %s\n" "${1}" 1>&2
-    exit 1
-esac
-shift
-done
-
-# Assert "--threads" argument is not empy if "Array" backend is present
-if [[ " ${BACKENDS[*]} " =~ [[:space:]]'Array'[[:space:]] ]]; then
-    if [ "${#THREADS[@]}" == 0 ]; then
-        echo "ERROR: Backend 'Array' is present, but '--threads' argument is empty."
-        exit 1
-    fi
-fi
-
-# Assert all case arguments have equal size
-NCASES=${#CASES[@]}
-NLOG2P=${#LOG2P[@]}
-NMAXSTEPS=${#MAXSTEPS[@]}
-NFTYPE=${#FTYPE[@]}
-st=0
-for i in $NLOG2P $NMAXSTEPS $NFTYPE; do
-    [ "$NCASES" = "$i" ]
-    st=$(( $? + st ))
-done
-if [ $st != 0 ]; then
-    echo "ERROR: Case arguments are arrays of different sizes."
-    exit 1
-fi
-
-# Check if specific WaterLily version have been specified
-if (( ${#WL_VERSIONS[@]} != 0 )); then
-    WATERLILY_CHECKOUT=true
-else
-    WATERLILY_CHECKOUT=false
-    WL_VERSIONS=($WL_CURRENT_VERSION)
-fi
-
-# Display information
-display_info
-
-# Join arrays
-CASES=$(join_array_str_comma "${CASES[*]}")
-LOG2P=$(join_array_tuple_comma "${LOG2P[*]}")
-MAXSTEPS=$(join_array_comma "${MAXSTEPS[*]}")
-FTYPE=$(join_array_comma "${FTYPE[*]}")
-args_cases="--cases=$CASES --log2p=$LOG2P --max_steps=$MAXSTEPS --ftype=$FTYPE"
-
-# Benchmarks
-for version in "${VERSIONS[@]}" ; do
-    if check_if_juliaup; then
-        echo "Julia $version benchmarks"
-    else
-        echo "Running with default Julia version $( julia_version ) from $( which julia )"
-    fi
-    for wl_version in "${WL_VERSIONS[@]}" ; do
-        update_environment
-        for backend in "${BACKENDS[@]}" ; do
-            if [ "${backend}" == "Array" ]; then
-                for thread in "${THREADS[@]}" ; do
-                    args="-t $thread ${THIS_DIR}/benchmark.jl --backend=$backend $args_cases"
-                    run_benchmark
-                done
-            else
-                args="${THIS_DIR}/benchmark.jl --backend=$backend $args_cases"
-                run_benchmark
-            fi
-        done
-    done
-    if ! check_if_juliaup; then
-        break
-    fi # if no juliaup, we only test default Julia version
-done
-
-echo "All done!"
-exit 0
diff --git a/benchmark/compare.jl b/benchmark/compare.jl
deleted file mode 100644
index 3001fc1..0000000
--- a/benchmark/compare.jl
+++ /dev/null
@@ -1,115 +0,0 @@
-# Run with
-# julia --project compare.jl --dir="data" --plot="plots" --patterns=["tgv","sphere","cylinder"] --sort=1
-# julia --project compare.jl  --plot="plots" --sort=1 $(find data/ \( -name "tgv*json" -o -name "sphere*json" -o -name "cylinder*json" \) -printf "%T@ %Tc %p\n" | sort -n | awk '{print $7}')
-
-using BenchmarkTools, PrettyTables
-include("util.jl")
-
-# Parse CLA and load benchmarks
-sort_idx = !isnothing(iarg("sort", ARGS)) ? arg_value("sort", ARGS) |> metaparse : 0
-plotdir = !isnothing(iarg("plotdir", ARGS)) ? arg_value("plotdir", ARGS) : nothing
-datadir = !isnothing(iarg("datadir", ARGS)) ? arg_value("datadir", ARGS) : false
-patterns = !isnothing(iarg("patterns", ARGS)) ? arg_value("patterns", ARGS) |> parsepatterns |> metaparse : String["tgv", "sphere", "cylinder"]
-!isa(datadir, String) && !isnothing(iarg("cases", ARGS)) && @error "Data directory needed if --cases are passed as command line argument."
-benchmarks_list = isa(datadir, AbstractString) ? rdir(datadir, patterns) : [f for f in ARGS if !any(occursin.(["--sort","--datadir","--plotdir"], f))]
-println("Processing the following benchmarks:")
-for f in benchmarks_list
-    println("    ", f)
-end
-benchmarks_all = [BenchmarkTools.load(f)[1] for f in benchmarks_list]
-
-# Separate benchmarks by test case
-all_cases = String["tgv", "sphere", "cylinder", "jelly"]
-cases_ordered = all_cases[filter(x -> !isnothing(x),[findfirst(x->x==1, contains.(p, all_cases)) for p in patterns])]
-length(cases_ordered) == 0 && (cases_ordered = all_cases)
-cases_str = [b.tags[1] for b in benchmarks_all] |> unique
-benchmarks_all_dict = Dict(Pair{String, Vector{BenchmarkGroup}}(k, []) for k in cases_str)
-for b in benchmarks_all
-    push!(benchmarks_all_dict[b.tags[1]], b)
-end
-
-# Table and plots
-!isa(plotdir, Nothing) &&  mkpath(plotdir)
-for (kk, case) in enumerate(cases_ordered)
-    benchmarks = benchmarks_all_dict[case]
-    # Get backends string vector and assert same case sizes for the different backends
-    backends_str = [String.(k)[1] for k in keys.(benchmarks)]
-    log2p_str = [String.(keys(benchmarks[i][backend_str])) for (i, backend_str) in enumerate(backends_str)]
-    length(unique(log2p_str)) != 1 && @error "Case sizes missmatch."
-    log2p_str = sort(log2p_str[1])
-    f_test = benchmarks[1].tags[2]
-    # Get data for PrettyTables
-    header = ["Backend", "WaterLily", "Julia", "Precision", "Allocations", "GC [%]", "Time [s]", "Cost [ns/DOF/dt]", "Speed-up"]
-    data, base_speedup = Matrix{Any}(undef, length(benchmarks), length(header)), 1.0
-    # plotting_dir := Dict[("WaterLily version", "Julia version", "precision")][backend, log2p, {3}] # times, cost, speedups
-    plotting_dict = Dict{NTuple, Array{Float64}}()
-
-    printstyled("Benchmark environment: $case $f_test (max_steps=$(benchmarks[1].tags[4]))\n", bold=true)
-    for (k, n) in enumerate(log2p_str)
-        printstyled("▶ log2p = $n\n", bold=true)
-        for (i, benchmark) in enumerate(benchmarks)
-            datap = benchmark[backends_str[i]][n][f_test]
-            speedup = i == 1 ? 1.0 : benchmarks[1][backends_str[1]][n][f_test].times[1] / datap.times[1]
-            N = prod(tests_dets[case]["size"]) .* 2 .^ (3 .* eval(Meta.parse.(n)))
-            cost = datap.times[1] / N / benchmarks[1].tags[4]
-            data[i, :] .= [backends_str[i], benchmark.tags[end-1], benchmark.tags[end], benchmark.tags[end-3],
-                datap.allocs, (datap.gctimes[1] / datap.times[1]) * 100.0, datap.times[1] / 1e9, cost, speedup]
-            versions_key = (benchmark.tags[end-1], benchmark.tags[end], benchmark.tags[end-3])
-            backend_idx = findall(x -> x == backends_str[i], unique(backends_str))[1]
-            !(versions_key in keys(plotting_dict)) &&
-                (plotting_dict[versions_key] = zeros(length(log2p_str), length(unique(backends_str)), 3))
-            plotting_dict[versions_key][k, backend_idx, :] = data[i, end-2:end]
-        end
-        sorted_cond, sorted_idx = 0 < sort_idx <= length(header), nothing
-        if sorted_cond
-            sorted_idx = sortperm(data[:, sort_idx])
-            baseline_idx = findfirst(x->x==1, sorted_idx)
-            data .= data[sorted_idx, :]
-        end
-        hl_base = Highlighter(f=(data, i, j) -> sorted_cond ? i == findfirst(x->x==1, sorted_idx) : i==1,
-            crayon=Crayon(foreground=:blue))
-        hl_fast = Highlighter(f=(data, i, j) -> i == argmin(data[:, end-1]), crayon=Crayon(foreground=(32,125,56)))
-        pretty_table(data; header=header, header_alignment=:c, highlighters=(hl_base, hl_fast), formatters=ft_printf("%.2f", [6,7,8,9]))
-    end
-
-    # Plotting each configuration of WaterLily version, Julia version and precision in benchamarks
-    if !isa(plotdir, Nothing)
-        # Get cases size
-        N = prod(tests_dets[case]["size"]) .* 2 .^ (3 .* eval(Meta.parse.(log2p_str)))
-        N_str = (N./1e6) .|> x -> @sprintf("%.2f", x)
-        unique_backends_str = unique(backends_str)
-
-        for (k, data_plot) in plotting_dict
-            versions_key = join(k, '_')
-            # Cost plot
-            p_cost = plot()
-            for (i, bstr) in enumerate(unique_backends_str)
-                scatter!(p_cost, N./1e6, data_plot[:, i, 2], label=unique_backends_str[i], ms=10, ma=1)
-            end
-            scatter!(p_cost, yaxis=:log10, xaxis=:log10, yminorgrid=true, xminorgrid=true,
-                ylims=(1, 1000), xlims=(0.1, 600),
-                xlabel="DOF [M]", lw=0, framestyle=:box, grid=:xy, size=(600, 600),
-                left_margin=Plots.Measures.Length(:mm, 5), right_margin=Plots.Measures.Length(:mm, 5),
-                ylabel="Cost [ns/DOF/dt]", title=tests_dets[case]["title"], legend=:bottomleft
-            )
-            fancylogscale!(p_cost)
-            savefig(p_cost, joinpath(string(@__DIR__), plotdir, "$(case)_cost_$(versions_key).pdf"))
-
-            # Speedup plot
-            groups = repeat(N_str, inner=length(unique_backends_str)) |> CategoricalArray
-            levels!(groups, N_str)
-            ctg = repeat(unique_backends_str, outer=length(log2p_str)) |> CategoricalArray
-            levels!(ctg, unique_backends_str)
-            p = annotated_groupedbar(groups, transpose(data_plot[:, :, 1]), ctg;
-                series_annotations=vec(transpose(data_plot[:, :, 3])) .|> x -> @sprintf("%d", x) .|> latexstring, bar_width=0.92,
-                Dict(:xlabel=>"DOF [M]", :title=>tests_dets[case]["title"],
-                    :ylims=>(1e-1, 1e5), :lw=>0, :framestyle=>:box, :yaxis=>:log10, :grid=>true,
-                    :color=>reshape(palette([:cyan, :green], length(unique_backends_str))[1:length(unique_backends_str)], (1, length(unique_backends_str))),
-                    :size=>(600, 600)
-                )...
-            )
-            plot!(p, ylabel="Time [s]", legend=:topleft, left_margin=Plots.Measures.Length(:mm, 0))
-            savefig(p, joinpath(string(@__DIR__), plotdir, "$(case)_benchmark_$(versions_key).pdf"))
-        end
-    end
-end
\ No newline at end of file
diff --git a/benchmark/util.jl b/benchmark/util.jl
deleted file mode 100644
index 60b9608..0000000
--- a/benchmark/util.jl
+++ /dev/null
@@ -1,202 +0,0 @@
-using Plots, StatsPlots, LaTeXStrings, CategoricalArrays, Printf, ColorSchemes
-
-iarg(arg, args) = occursin.(arg, args) |> findfirst
-arg_value(arg, args) = split(args[iarg(arg, args)], "=")[end]
-metaparse(x) = eval(Meta.parse(x))
-parsepatterns(x) = replace(x,","=>("\",\""),"["=>("[\""),"]"=>("\"]"))
-
-function parse_cla(args; cases=["tgv"], log2p=[(6,7)], max_steps=[100], ftype=[Float32], backend=Array)
-    cases = !isnothing(iarg("cases", args)) ? arg_value("cases", args) |> metaparse : cases
-    log2p = !isnothing(iarg("log2p", args)) ? arg_value("log2p", args) |> metaparse : log2p
-    max_steps = !isnothing(iarg("max_steps", args)) ? arg_value("max_steps", args) |> metaparse : max_steps
-    ftype = !isnothing(iarg("ftype", args)) ? arg_value("ftype", args) |> metaparse : ftype
-    backend = !isnothing(iarg("backend", args)) ? arg_value("backend", args) |> x -> eval(Symbol(x)) : backend
-    return cases, log2p, max_steps, ftype, backend
-end
-
-macro add_benchmark(args...)
-    ex, b, suite, label = args
-    return quote
-        $suite[$label] = @benchmarkable begin
-            $ex
-            synchronize($b)
-        end
-    end |> esc
-end
-
-function add_to_suite!(suite, sim_function; p=(3,4,5), s=100, ft=Float32, backend=Array, bstr="CPU", remeasure=false)
-    suite[bstr] = BenchmarkGroup([bstr])
-    for n in p
-        sim = sim_function(n, backend; T=ft)
-        sim_step!(sim, typemax(ft); max_steps=5, verbose=false, remeasure=remeasure) # warm up
-        suite[bstr][repr(n)] = BenchmarkGroup([repr(n)])
-        KA_backend = get_backend(sim.flow.p)
-        @add_benchmark sim_step!($sim, $typemax($ft); max_steps=$s, verbose=false, remeasure=$remeasure) $KA_backend suite[bstr][repr(n)] "sim_step!"
-    end
-end
-
-waterlily_dir = get(ENV, "WATERLILY_ROOT", "")
-git_hash = read(`git -C $waterlily_dir rev-parse --short HEAD`, String) |> x -> strip(x, '\n')
-getf(str) = eval(Symbol(str))
-
-backend_str = Dict(Array => "CPUx"*@sprintf("%.2d", Threads.nthreads()))
-check_compiler(compiler,parse_str) = try occursin(parse_str, read(`$compiler --version`, String)) catch _ false end
-_cuda = check_compiler("nvcc","release")
-_rocm = check_compiler("hipcc","version")
-_cuda && (using CUDA: CuArray; backend_str[CuArray] = "CUDA")
-_rocm && (using AMDGPU: ROCArray; backend_str[ROCArray] = "ROCm")
-(_cuda || _rocm) && (using GPUArrays: allowscalar; allowscalar(false))
-
-# Plotting utils
-using Plots
-
-fontsize = 14
-speedup_fontsize = 14
-Plots.default(
-    fontfamily = "Computer Modern",
-    linewidth = 1,
-    framestyle = :box,
-    grid = false,
-    left_margin = Plots.Measures.Length(:mm, 24),
-    right_margin = Plots.Measures.Length(:mm, 0),
-    bottom_margin = Plots.Measures.Length(:mm, 5),
-    top_margin = Plots.Measures.Length(:mm, 5),
-    legendfontsize = fontsize,
-    tickfontsize = fontsize,
-    labelfontsize = fontsize,
-)
-
-# Fancy logarithmic scale ticks for plotting
-# https://github.com/JuliaPlots/Plots.jl/issues/3318
-"""
-    get_tickslogscale(lims; skiplog=false)
-Return a tuple (ticks, ticklabels) for the axis limit `lims`
-where multiples of 10 are major ticks with label and minor ticks have no label
-skiplog argument should be set to true if `lims` is already in log scale.
-"""
-function get_tickslogscale(lims::Tuple{T, T}; skiplog::Bool=false) where {T<:AbstractFloat}
-    mags = if skiplog
-        # if the limits are already in log scale
-        floor.(lims)
-    else
-        floor.(log10.(lims))
-    end
-    rlims = if skiplog; 10 .^(lims) else lims end
-
-    total_tickvalues = []
-    total_ticknames = []
-
-    rgs = range(mags..., step=1)
-    for (i, m) in enumerate(rgs)
-        if m >= 0
-            tickvalues = range(Int(10^m), Int(10^(m+1)); step=Int(10^m))
-            ticknames  = vcat([string(round(Int, 10^(m)))],
-                              ["" for i in 2:9],
-                              [string(round(Int, 10^(m+1)))])
-        else
-            tickvalues = range(10^m, 10^(m+1); step=10^m)
-            ticknames  = vcat([string(10^(m))], ["" for i in 2:9], [string(10^(m+1))])
-        end
-
-        if i==1
-            # lower bound
-            indexlb = findlast(x->x<rlims[1], tickvalues)
-            if isnothing(indexlb); indexlb=1 end
-        else
-            indexlb = 1
-        end
-        if i==length(rgs)
-            # higher bound
-            indexhb = findfirst(x->x>rlims[2], tickvalues)
-            if isnothing(indexhb); indexhb=10 end
-        else
-            # do not take the last index if not the last magnitude
-            indexhb = 9
-        end
-
-        total_tickvalues = vcat(total_tickvalues, tickvalues[indexlb:indexhb])
-        total_ticknames = vcat(total_ticknames, ticknames[indexlb:indexhb])
-    end
-    return (total_tickvalues[1:end-1], total_ticknames[1:end-1])
-end
-
-"""
-    fancylogscale!(p; forcex=false, forcey=false)
-Transform the ticks to log scale for the axis with scale=:log10.
-forcex and forcey can be set to true to force the transformation
-if the variable is already expressed in log10 units.
-"""
-function fancylogscale!(p::Plots.Subplot; forcex::Bool=false, forcey::Bool=false)
-    kwargs = Dict()
-    for (ax, force, lims) in zip((:x, :y), (forcex, forcey), (xlims, ylims))
-        axis = Symbol("$(ax)axis")
-        ticks = Symbol("$(ax)ticks")
-
-        if force || p.attr[axis][:scale] == :log10
-            # Get limits of the plot and convert to Float
-            ls = float.(lims(p))
-            ts = if force
-                (vals, labs) = get_tickslogscale(ls; skiplog=true)
-                (log10.(vals), labs)
-            else
-                get_tickslogscale(ls)
-            end
-            kwargs[ticks] = ts
-        end
-    end
-
-    if length(kwargs) > 0
-        plot!(p; kwargs...)
-    end
-    p
-end
-fancylogscale!(p::Plots.Plot; kwargs...) = (fancylogscale!(p.subplots[1]; kwargs...); return p)
-fancylogscale!(; kwargs...) = fancylogscale!(plot!(); kwargs...)
-
-function Base.unique(ctg::CategoricalArray)
-    l = levels(ctg)
-    newctg = CategoricalArray(l)
-    levels!(newctg, l)
-end
-
-function annotated_groupedbar(xx, yy, group; series_annotations="", bar_width=1.0, plot_kwargs...)
-    gp = groupedbar(xx, yy, group=group, series_annotations="", bar_width=bar_width; plot_kwargs...)
-    m = length(unique(group))       # number of items per group
-    n = length(unique(xx))          # number of groups
-    xt = (1:n) .- 0.5               # plot x-coordinate of groups' centers
-    dx = bar_width/m                # each group occupies bar_width units along x
-    # dy = diff([extrema(yy)...])[1]
-    x2 = [xt[i] + (j - m/2 - 0.3)*dx for j in 1:m, i in 1:n][:]
-    k = 1
-    for i in 1:n, j in 1:m
-        y0 = gp[1][2j][:y][i]*1.3# + 0.04*dy
-        if isfinite(y0)
-            annotate!(x2[(i-1)*m + j]*1.01, y0, text(series_annotations[k], :center, :black, speedup_fontsize))
-            k += 1
-        end
-    end
-    gp
-end
-
-# Find files utils
-using Glob
-function rdir(dir, patterns)
-    results = String[]
-    patterns = [Glob.FilenameMatch("*" * p * "*") for p in patterns]
-    for (root, _, files) in walkdir(dir)
-        fpaths = joinpath.(root, files)
-        length(fpaths) == 0 && continue
-        matches = [filter(x -> occursin(p, x), fpaths) for p in patterns]
-        push!(results, vcat(matches...)...)
-    end
-    results
-end
-
-# Benchmark sizes
-tests_dets = Dict(
-    "tgv" => Dict("size" => (1, 1, 1), "title" => "TGV"),
-    "sphere" => Dict("size" => (16, 6, 6), "title" => "Sphere"),
-    "cylinder" => Dict("size" => (12, 6, 2), "title" => "Moving cylinder"),
-    "donut" => Dict("size" => (2, 1, 1), "title" => "Donut"),
-    "jelly" => Dict("size" => (1, 1, 4), "title" => "Jelly"),
-)