From 834ea16c992c4a0cb75c15084c3801799a3edaf5 Mon Sep 17 00:00:00 2001 From: ruslandoga <67764432+ruslandoga@users.noreply.github.com> Date: Fri, 20 Dec 2024 21:08:32 +0700 Subject: [PATCH] run benchmarks nightly --- .github/workflows/bench.yml | 17 ++--- README.md | 121 +----------------------------------- bench/encode.exs | 27 ++++++++ bench/insert.exs | 69 ++++++++++---------- bench/stream.exs | 46 +++++++++----- 5 files changed, 107 insertions(+), 173 deletions(-) create mode 100644 bench/encode.exs diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 9cb0c86..92cce48 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -1,6 +1,9 @@ name: bench -on: workflow_dispatch +on: + workflow_dispatch: + schedule: + - cron: "42 9 * * *" jobs: benchee: @@ -41,10 +44,8 @@ jobs: - run: mix deps.get --only $MIX_ENV - run: mix compile --warnings-as-errors - - run: mkdir results - - run: mix run bench/insert.exs | tee results/insert.txt - - run: mix run bench/stream.exs | tee results/stream.txt - - uses: actions/upload-artifact@v4 - with: - name: results - path: results/*.txt + + # - run: mix run bench/cast.exs + - run: mix run bench/encode.exs + - run: mix run bench/insert.exs + - run: mix run bench/stream.exs diff --git a/README.md b/README.md index 54a3bd1..5afc2ae 100644 --- a/README.md +++ b/README.md @@ -270,121 +270,6 @@ taipei = DateTime.shift_zone!(utc, "Asia/Taipei") Ch.query!(pid, "INSERT INTO ch_datetimes(datetime) FORMAT RowBinary", [[naive], [utc], [taipei]], types: ["DateTime"]) ``` -## Benchmarks - -
-INSERT 1 million rows (original) - -

-$ MIX_ENV=bench mix run bench/insert.exs
-
-This benchmark is based on https://github.com/ClickHouse/clickhouse-go#benchmark
-
-Operating System: macOS
-CPU Information: Apple M1
-Number of Available Cores: 8
-Available memory: 8 GB
-Elixir 1.14.4
-Erlang 25.3
-
-Benchmark suite executing with the following configuration:
-warmup: 2 s
-time: 5 s
-memory time: 0 ns
-reduction time: 0 ns
-parallel: 1
-inputs: 1_000_000 rows
-Estimated total run time: 28 s
-
-Benchmarking encode with input 1_000_000 rows ...
-Benchmarking encode stream with input 1_000_000 rows ...
-Benchmarking insert with input 1_000_000 rows ...
-Benchmarking insert stream with input 1_000_000 rows ...
-
-##### With input 1_000_000 rows #####
-Name                    ips        average  deviation         median         99th %
-encode stream          1.63      612.96 ms    ±11.30%      583.03 ms      773.01 ms
-insert stream          1.22      819.82 ms     ±9.41%      798.94 ms      973.45 ms
-encode                 1.09      915.75 ms    ±44.13%      750.98 ms     1637.02 ms
-insert                 0.73     1373.84 ms    ±31.01%     1331.86 ms     1915.76 ms
-
-Comparison: 
-encode stream          1.63
-insert stream          1.22 - 1.34x slower +206.87 ms
-encode                 1.09 - 1.49x slower +302.79 ms
-insert                 0.73 - 2.24x slower +760.88 ms
-
- -
- -
-SELECT 500, 500 thousand, and 500 million rows (original) - -

-$ MIX_ENV=bench mix run bench/stream.exs
-
-This benchmark is based on https://github.com/ClickHouse/ch-bench
-
-Operating System: macOS
-CPU Information: Apple M1
-Number of Available Cores: 8
-Available memory: 8 GB
-Elixir 1.14.4
-Erlang 25.3
-
-Benchmark suite executing with the following configuration:
-warmup: 2 s
-time: 5 s
-memory time: 0 ns
-reduction time: 0 ns
-parallel: 1
-inputs: 500 rows, 500_000 rows, 500_000_000 rows
-Estimated total run time: 1.05 min
-
-Benchmarking stream with decode with input 500 rows ...
-Benchmarking stream with decode with input 500_000 rows ...
-Benchmarking stream with decode with input 500_000_000 rows ...
-Benchmarking stream with manual decode with input 500 rows ...
-Benchmarking stream with manual decode with input 500_000 rows ...
-Benchmarking stream with manual decode with input 500_000_000 rows ...
-Benchmarking stream without decode with input 500 rows ...
-Benchmarking stream without decode with input 500_000 rows ...
-Benchmarking stream without decode with input 500_000_000 rows ...
-
-##### With input 500 rows #####
-Name                                ips        average  deviation         median         99th %
-stream with decode               4.69 K      213.34 μs    ±12.49%      211.38 μs      290.94 μs
-stream with manual decode        4.69 K      213.43 μs    ±17.40%      210.96 μs      298.75 μs
-stream without decode            4.65 K      215.08 μs    ±10.79%      213.79 μs      284.66 μs
-
-Comparison:
-stream with decode               4.69 K
-stream with manual decode        4.69 K - 1.00x slower +0.0838 μs
-stream without decode            4.65 K - 1.01x slower +1.74 μs
-
-##### With input 500_000 rows #####
-Name                                ips        average  deviation         median         99th %
-stream without decode            234.58        4.26 ms    ±13.99%        4.04 ms        5.95 ms
-stream with manual decode         64.26       15.56 ms     ±8.36%       15.86 ms       17.97 ms
-stream with decode                41.03       24.37 ms     ±6.27%       24.39 ms       26.60 ms
-
-Comparison:
-stream without decode            234.58
-stream with manual decode         64.26 - 3.65x slower +11.30 ms
-stream with decode                41.03 - 5.72x slower +20.11 ms
-
-##### With input 500_000_000 rows #####
-Name                                ips        average  deviation         median         99th %
-stream without decode              0.32         3.17 s     ±0.20%         3.17 s         3.17 s
-stream with manual decode        0.0891        11.23 s     ±0.00%        11.23 s        11.23 s
-stream with decode               0.0462        21.66 s     ±0.00%        21.66 s        21.66 s
-
-Comparison:
-stream without decode              0.32
-stream with manual decode        0.0891 - 3.55x slower +8.06 s
-stream with decode               0.0462 - 6.84x slower +18.50 s
-
- -
- -[CI Results](https://github.com/plausible/ch/actions/workflows/bench.yml) (click the latest workflow run and scroll down to "Artifacts") +## [Benchmarks](./bench) + +See nightly [CI runs](https://github.com/plausible/ch/actions/workflows/bench.yml) for latest results. diff --git a/bench/encode.exs b/bench/encode.exs new file mode 100644 index 0000000..2729ebf --- /dev/null +++ b/bench/encode.exs @@ -0,0 +1,27 @@ +IO.puts(""" +This benchmark measures the performance of encoding rows in RowBinary format. +""") + +alias Ch.RowBinary + +types = ["UInt64", "String", "Array(UInt8)", "DateTime"] + +rows = fn count -> + Enum.map(1..count, fn i -> + [i, "Golang SQL database driver", [1, 2, 3, 4, 5, 6, 7, 8, 9], DateTime.utc_now()] + end) +end + +Benchee.run( + %{ + "RowBinary" => fn rows -> RowBinary.encode_rows(rows, types) end, + "RowBinary stream" => fn rows -> + Stream.chunk_every(rows, 60_000) + |> Stream.each(fn chunk -> RowBinary.encode_rows(chunk, types) end) + |> Stream.run() + end + }, + inputs: %{ + "1_000_000 (UInt64, String, Array(UInt8), DateTime) rows" => rows.(1_000_000) + } +) diff --git a/bench/insert.exs b/bench/insert.exs index f6f01c4..71affee 100644 --- a/bench/insert.exs +++ b/bench/insert.exs @@ -1,54 +1,59 @@ -IO.puts("This benchmark is based on https://github.com/ClickHouse/clickhouse-go#benchmark\n") +IO.puts(""" +This benchmark is based on https://github.com/ClickHouse/clickhouse-go#benchmark + +It tests how quickly a client can insert one million rows of the following schema: +- col1 UInt64 +- col2 String +- col3 Array(UInt8) +- col4 DateTime +""") port = String.to_integer(System.get_env("CH_PORT") || "8123") hostname = System.get_env("CH_HOSTNAME") || "localhost" scheme = System.get_env("CH_SCHEME") || "http" database = System.get_env("CH_DATABASE") || "ch_bench" -{:ok, conn} = Ch.start_link(scheme: scheme, hostname: hostname, port: port) -Ch.query!(conn, "CREATE DATABASE IF NOT EXISTS {$0:Identifier}", [database]) - -Ch.query!(conn, """ -CREATE TABLE IF NOT EXISTS #{database}.benchmark ( - col1 UInt64, - col2 String, - col3 Array(UInt8), - col4 DateTime -) Engine Null -""") - -types = [Ch.Types.u64(), Ch.Types.string(), Ch.Types.array(Ch.Types.u8()), Ch.Types.datetime()] -statement = "INSERT INTO #{database}.benchmark FORMAT RowBinary" +alias Ch.RowBinary rows = fn count -> Enum.map(1..count, fn i -> - [i, "Golang SQL database driver", [1, 2, 3, 4, 5, 6, 7, 8, 9], NaiveDateTime.utc_now()] + [i, "Golang SQL database driver", [1, 2, 3, 4, 5, 6, 7, 8, 9], DateTime.utc_now()] end) end -alias Ch.RowBinary +statement = "INSERT INTO #{database}.benchmark FORMAT RowBinary" +types = ["UInt64", "String", "Array(UInt8)", "DateTime"] Benchee.run( %{ - # "control" => fn rows -> Enum.each(rows, fn _row -> :ok end) end, - "encode" => fn rows -> RowBinary.encode_rows(rows, types) end, - "insert" => fn rows -> Ch.query!(conn, statement, rows, types: types) end, - # "control stream" => fn rows -> rows |> Stream.chunk_every(60_000) |> Stream.run() end, - "encode stream" => fn rows -> - rows - |> Stream.chunk_every(60_000) - |> Stream.map(fn chunk -> RowBinary.encode_rows(chunk, types) end) - |> Stream.run() + "Ch.query" => fn %{pool: pool, rows: rows} -> + Ch.query!(pool, statement, rows, types: types) end, - "insert stream" => fn rows -> - stream = - rows - |> Stream.chunk_every(60_000) + "Ch.stream" => fn %{pool: pool, rows: rows} -> + DBConnection.run(pool, fn conn -> + Stream.chunk_every(rows, 100_000) |> Stream.map(fn chunk -> RowBinary.encode_rows(chunk, types) end) - - Ch.query!(conn, statement, stream, encode: false) + |> Stream.into(Ch.stream(conn, statement, [], encode: false)) + |> Stream.run() + end) end }, + before_scenario: fn rows -> + {:ok, pool} = Ch.start_link(scheme: scheme, hostname: hostname, port: port, pool_size: 1) + + Ch.query!(pool, "CREATE DATABASE IF NOT EXISTS {$0:Identifier}", [database]) + + Ch.query!(pool, """ + CREATE TABLE IF NOT EXISTS #{database}.benchmark ( + col1 UInt64, + col2 String, + col3 Array(UInt8), + col4 DateTime + ) Engine Null + """) + + %{pool: pool, rows: rows} + end, inputs: %{ "1_000_000 rows" => rows.(1_000_000) } diff --git a/bench/stream.exs b/bench/stream.exs index 64901a8..77d7336 100644 --- a/bench/stream.exs +++ b/bench/stream.exs @@ -1,16 +1,34 @@ -IO.puts("This benchmark is based on https://github.com/ClickHouse/ch-bench\n") +IO.puts(""" +This benchmark is based on https://github.com/ClickHouse/ch-bench + +It tests how quickly a client can select N rows from the system.numbers_mt table: + + SELECT number FROM system.numbers_mt LIMIT {limit:UInt64} FORMAT RowBinary +""") port = String.to_integer(System.get_env("CH_PORT") || "8123") hostname = System.get_env("CH_HOSTNAME") || "localhost" scheme = System.get_env("CH_SCHEME") || "http" -{:ok, conn} = Ch.start_link(scheme: scheme, hostname: hostname, port: port) +limits = fn limits -> + Map.new(limits, fn limit -> + {"limit=#{limit}", limit} + end) +end Benchee.run( %{ - "RowBinary stream without decode" => fn limit -> + # "Ch.query" => fn %{pool: pool, limit: limit} -> + # Ch.query!( + # pool, + # "SELECT number FROM system.numbers_mt LIMIT {limit:UInt64}", + # %{"limit" => limit}, + # timeout: :infinity + # ) + # end, + "Ch.stream w/o decoding (i.e. pass-through)" => fn %{pool: pool, limit: limit} -> DBConnection.run( - conn, + pool, fn conn -> conn |> Ch.stream( @@ -22,19 +40,17 @@ Benchee.run( timeout: :infinity ) end, - "RowBinary stream with manual decode" => fn limit -> + "Ch.stream with manual RowBinary decoding" => fn %{pool: pool, limit: limit} -> DBConnection.run( - conn, + pool, fn conn -> conn |> Ch.stream( "SELECT number FROM system.numbers_mt LIMIT {limit:UInt64} FORMAT RowBinary", %{"limit" => limit} ) - |> Stream.map(fn %Ch.Result{data: data} -> - data - |> IO.iodata_to_binary() - |> Ch.RowBinary.decode_rows([:u64]) + |> Stream.each(fn %Ch.Result{data: data} -> + data |> IO.iodata_to_binary() |> Ch.RowBinary.decode_rows([:u64]) end) |> Stream.run() end, @@ -42,9 +58,9 @@ Benchee.run( ) end }, - inputs: %{ - "500 rows" => 500, - "500_000 rows" => 500_000, - "500_000_000 rows" => 500_000_000 - } + before_scenario: fn limit -> + {:ok, pool} = Ch.start_link(scheme: scheme, hostname: hostname, port: port, pool_size: 1) + %{pool: pool, limit: limit} + end, + inputs: limits.([500, 500_000, 500_000_000]) )