-SELECT
500, 500 thousand, and 500 million rows (original)
-
-
-$ MIX_ENV=bench mix run bench/stream.exs
-
-This benchmark is based on https://github.com/ClickHouse/ch-bench
-
-Operating System: macOS
-CPU Information: Apple M1
-Number of Available Cores: 8
-Available memory: 8 GB
-Elixir 1.14.4
-Erlang 25.3
-
-Benchmark suite executing with the following configuration:
-warmup: 2 s
-time: 5 s
-memory time: 0 ns
-reduction time: 0 ns
-parallel: 1
-inputs: 500 rows, 500_000 rows, 500_000_000 rows
-Estimated total run time: 1.05 min
-
-Benchmarking stream with decode with input 500 rows ...
-Benchmarking stream with decode with input 500_000 rows ...
-Benchmarking stream with decode with input 500_000_000 rows ...
-Benchmarking stream with manual decode with input 500 rows ...
-Benchmarking stream with manual decode with input 500_000 rows ...
-Benchmarking stream with manual decode with input 500_000_000 rows ...
-Benchmarking stream without decode with input 500 rows ...
-Benchmarking stream without decode with input 500_000 rows ...
-Benchmarking stream without decode with input 500_000_000 rows ...
-
-##### With input 500 rows #####
-Name ips average deviation median 99th %
-stream with decode 4.69 K 213.34 μs ±12.49% 211.38 μs 290.94 μs
-stream with manual decode 4.69 K 213.43 μs ±17.40% 210.96 μs 298.75 μs
-stream without decode 4.65 K 215.08 μs ±10.79% 213.79 μs 284.66 μs
-
-Comparison:
-stream with decode 4.69 K
-stream with manual decode 4.69 K - 1.00x slower +0.0838 μs
-stream without decode 4.65 K - 1.01x slower +1.74 μs
-
-##### With input 500_000 rows #####
-Name ips average deviation median 99th %
-stream without decode 234.58 4.26 ms ±13.99% 4.04 ms 5.95 ms
-stream with manual decode 64.26 15.56 ms ±8.36% 15.86 ms 17.97 ms
-stream with decode 41.03 24.37 ms ±6.27% 24.39 ms 26.60 ms
-
-Comparison:
-stream without decode 234.58
-stream with manual decode 64.26 - 3.65x slower +11.30 ms
-stream with decode 41.03 - 5.72x slower +20.11 ms
-
-##### With input 500_000_000 rows #####
-Name ips average deviation median 99th %
-stream without decode 0.32 3.17 s ±0.20% 3.17 s 3.17 s
-stream with manual decode 0.0891 11.23 s ±0.00% 11.23 s 11.23 s
-stream with decode 0.0462 21.66 s ±0.00% 21.66 s 21.66 s
-
-Comparison:
-stream without decode 0.32
-stream with manual decode 0.0891 - 3.55x slower +8.06 s
-stream with decode 0.0462 - 6.84x slower +18.50 s
-
-
-
-
-[CI Results](https://github.com/plausible/ch/actions/workflows/bench.yml) (click the latest workflow run and scroll down to "Artifacts")
+## [Benchmarks](./bench)
+
+See nightly [CI runs](https://github.com/plausible/ch/actions/workflows/bench.yml) for latest results.
diff --git a/bench/encode.exs b/bench/encode.exs
new file mode 100644
index 0000000..2729ebf
--- /dev/null
+++ b/bench/encode.exs
@@ -0,0 +1,27 @@
+IO.puts("""
+This benchmark measures the performance of encoding rows in RowBinary format.
+""")
+
+alias Ch.RowBinary
+
+types = ["UInt64", "String", "Array(UInt8)", "DateTime"]
+
+rows = fn count ->
+ Enum.map(1..count, fn i ->
+ [i, "Golang SQL database driver", [1, 2, 3, 4, 5, 6, 7, 8, 9], DateTime.utc_now()]
+ end)
+end
+
+Benchee.run(
+ %{
+ "RowBinary" => fn rows -> RowBinary.encode_rows(rows, types) end,
+ "RowBinary stream" => fn rows ->
+ Stream.chunk_every(rows, 60_000)
+ |> Stream.each(fn chunk -> RowBinary.encode_rows(chunk, types) end)
+ |> Stream.run()
+ end
+ },
+ inputs: %{
+ "1_000_000 (UInt64, String, Array(UInt8), DateTime) rows" => rows.(1_000_000)
+ }
+)
diff --git a/bench/insert.exs b/bench/insert.exs
index f6f01c4..71affee 100644
--- a/bench/insert.exs
+++ b/bench/insert.exs
@@ -1,54 +1,59 @@
-IO.puts("This benchmark is based on https://github.com/ClickHouse/clickhouse-go#benchmark\n")
+IO.puts("""
+This benchmark is based on https://github.com/ClickHouse/clickhouse-go#benchmark
+
+It tests how quickly a client can insert one million rows of the following schema:
+- col1 UInt64
+- col2 String
+- col3 Array(UInt8)
+- col4 DateTime
+""")
port = String.to_integer(System.get_env("CH_PORT") || "8123")
hostname = System.get_env("CH_HOSTNAME") || "localhost"
scheme = System.get_env("CH_SCHEME") || "http"
database = System.get_env("CH_DATABASE") || "ch_bench"
-{:ok, conn} = Ch.start_link(scheme: scheme, hostname: hostname, port: port)
-Ch.query!(conn, "CREATE DATABASE IF NOT EXISTS {$0:Identifier}", [database])
-
-Ch.query!(conn, """
-CREATE TABLE IF NOT EXISTS #{database}.benchmark (
- col1 UInt64,
- col2 String,
- col3 Array(UInt8),
- col4 DateTime
-) Engine Null
-""")
-
-types = [Ch.Types.u64(), Ch.Types.string(), Ch.Types.array(Ch.Types.u8()), Ch.Types.datetime()]
-statement = "INSERT INTO #{database}.benchmark FORMAT RowBinary"
+alias Ch.RowBinary
rows = fn count ->
Enum.map(1..count, fn i ->
- [i, "Golang SQL database driver", [1, 2, 3, 4, 5, 6, 7, 8, 9], NaiveDateTime.utc_now()]
+ [i, "Golang SQL database driver", [1, 2, 3, 4, 5, 6, 7, 8, 9], DateTime.utc_now()]
end)
end
-alias Ch.RowBinary
+statement = "INSERT INTO #{database}.benchmark FORMAT RowBinary"
+types = ["UInt64", "String", "Array(UInt8)", "DateTime"]
Benchee.run(
%{
- # "control" => fn rows -> Enum.each(rows, fn _row -> :ok end) end,
- "encode" => fn rows -> RowBinary.encode_rows(rows, types) end,
- "insert" => fn rows -> Ch.query!(conn, statement, rows, types: types) end,
- # "control stream" => fn rows -> rows |> Stream.chunk_every(60_000) |> Stream.run() end,
- "encode stream" => fn rows ->
- rows
- |> Stream.chunk_every(60_000)
- |> Stream.map(fn chunk -> RowBinary.encode_rows(chunk, types) end)
- |> Stream.run()
+ "Ch.query" => fn %{pool: pool, rows: rows} ->
+ Ch.query!(pool, statement, rows, types: types)
end,
- "insert stream" => fn rows ->
- stream =
- rows
- |> Stream.chunk_every(60_000)
+ "Ch.stream" => fn %{pool: pool, rows: rows} ->
+ DBConnection.run(pool, fn conn ->
+ Stream.chunk_every(rows, 100_000)
|> Stream.map(fn chunk -> RowBinary.encode_rows(chunk, types) end)
-
- Ch.query!(conn, statement, stream, encode: false)
+ |> Stream.into(Ch.stream(conn, statement, [], encode: false))
+ |> Stream.run()
+ end)
end
},
+ before_scenario: fn rows ->
+ {:ok, pool} = Ch.start_link(scheme: scheme, hostname: hostname, port: port, pool_size: 1)
+
+ Ch.query!(pool, "CREATE DATABASE IF NOT EXISTS {$0:Identifier}", [database])
+
+ Ch.query!(pool, """
+ CREATE TABLE IF NOT EXISTS #{database}.benchmark (
+ col1 UInt64,
+ col2 String,
+ col3 Array(UInt8),
+ col4 DateTime
+ ) Engine Null
+ """)
+
+ %{pool: pool, rows: rows}
+ end,
inputs: %{
"1_000_000 rows" => rows.(1_000_000)
}
diff --git a/bench/stream.exs b/bench/stream.exs
index 64901a8..77d7336 100644
--- a/bench/stream.exs
+++ b/bench/stream.exs
@@ -1,16 +1,34 @@
-IO.puts("This benchmark is based on https://github.com/ClickHouse/ch-bench\n")
+IO.puts("""
+This benchmark is based on https://github.com/ClickHouse/ch-bench
+
+It tests how quickly a client can select N rows from the system.numbers_mt table:
+
+ SELECT number FROM system.numbers_mt LIMIT {limit:UInt64} FORMAT RowBinary
+""")
port = String.to_integer(System.get_env("CH_PORT") || "8123")
hostname = System.get_env("CH_HOSTNAME") || "localhost"
scheme = System.get_env("CH_SCHEME") || "http"
-{:ok, conn} = Ch.start_link(scheme: scheme, hostname: hostname, port: port)
+limits = fn limits ->
+ Map.new(limits, fn limit ->
+ {"limit=#{limit}", limit}
+ end)
+end
Benchee.run(
%{
- "RowBinary stream without decode" => fn limit ->
+ # "Ch.query" => fn %{pool: pool, limit: limit} ->
+ # Ch.query!(
+ # pool,
+ # "SELECT number FROM system.numbers_mt LIMIT {limit:UInt64}",
+ # %{"limit" => limit},
+ # timeout: :infinity
+ # )
+ # end,
+ "Ch.stream w/o decoding (i.e. pass-through)" => fn %{pool: pool, limit: limit} ->
DBConnection.run(
- conn,
+ pool,
fn conn ->
conn
|> Ch.stream(
@@ -22,19 +40,17 @@ Benchee.run(
timeout: :infinity
)
end,
- "RowBinary stream with manual decode" => fn limit ->
+ "Ch.stream with manual RowBinary decoding" => fn %{pool: pool, limit: limit} ->
DBConnection.run(
- conn,
+ pool,
fn conn ->
conn
|> Ch.stream(
"SELECT number FROM system.numbers_mt LIMIT {limit:UInt64} FORMAT RowBinary",
%{"limit" => limit}
)
- |> Stream.map(fn %Ch.Result{data: data} ->
- data
- |> IO.iodata_to_binary()
- |> Ch.RowBinary.decode_rows([:u64])
+ |> Stream.each(fn %Ch.Result{data: data} ->
+ data |> IO.iodata_to_binary() |> Ch.RowBinary.decode_rows([:u64])
end)
|> Stream.run()
end,
@@ -42,9 +58,9 @@ Benchee.run(
)
end
},
- inputs: %{
- "500 rows" => 500,
- "500_000 rows" => 500_000,
- "500_000_000 rows" => 500_000_000
- }
+ before_scenario: fn limit ->
+ {:ok, pool} = Ch.start_link(scheme: scheme, hostname: hostname, port: port, pool_size: 1)
+ %{pool: pool, limit: limit}
+ end,
+ inputs: limits.([500, 500_000, 500_000_000])
)