diff --git a/README.md b/README.md index c9d900d..a31d600 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ A collection of data required to back our own ClickHouse official [Blogs](clickh A [list](./docker-compose-recipes/README.md) of ClickHouse recipes using docker compose: - Clickhouse single node with Keeper +- Clickhouse single node with Keeper and IMDB dataset - ClickHouse and Grafana - ClickHouse and MSSQL Server 2022 - ClickHouse and MinIO S3 diff --git a/docker-compose-recipes/README.md b/docker-compose-recipes/README.md index dcb3b9b..5aa480c 100644 --- a/docker-compose-recipes/README.md +++ b/docker-compose-recipes/README.md @@ -3,6 +3,7 @@ A list of ClickHouse docker compose recipes - [Clickhouse single node with Keeper](./recipes/ch-1S_1K/README.md) +- [Clickhouse single node with Keeper and IMDB dataset](./recipes/ch-1S_1K_IMDB_dataset/README.md) - [ClickHouse and Grafana](./recipes/ch-and-grafana/README.md) - [ClickHouse and MSSQL Server 2022](./recipes/ch-and-mssql/README.md) - [ClickHouse and MinIO S3](./recipes/ch-and-minio-S3/README.md) diff --git a/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/README.md b/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/README.md new file mode 100644 index 0000000..1a2aa41 --- /dev/null +++ b/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/README.md @@ -0,0 +1,15 @@ +# ClickHouse ch-1S_1K_IMDB_dataset + +Single node ClickHouse instance leveraging 1 ClickHouse Keeper with IMDB dataset + +By default the version of ClickHouse used will be `latest`, and ClickHouse Keeper +will be `latest-alpine`. You can specify specific versions by setting environment +variables before running `docker compose up`. + +This recipe simply automates the [IMDB dataset](https://en.wikipedia.org/wiki/IMDb) loading illustrated [here](https://clickhouse.com/docs/en/integrations/dbt#prepare-clickhouse). + +```bash +export CHVER=23.4 +export CHKVER=23.4-alpine +docker compose up +``` diff --git a/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/docker-compose.yaml b/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/docker-compose.yaml new file mode 100644 index 0000000..591276b --- /dev/null +++ b/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/docker-compose.yaml @@ -0,0 +1,25 @@ +version: '3.8' +services: + clickhouse: + image: 'clickhouse/clickhouse-server:${CHVER:-latest}' + user: '101:101' + container_name: clickhouse + hostname: clickhouse + volumes: + - ${PWD}/fs/volumes/clickhouse/etc/clickhouse-server/config.d/config.xml:/etc/clickhouse-server/config.d/config.xml + - ${PWD}/fs/volumes/clickhouse/etc/clickhouse-server/users.d/users.xml:/etc/clickhouse-server/users.d/users.xml + - ${PWD}/fs/volumes/clickhouse/docker-entrypoint-initdb.d:/docker-entrypoint-initdb.d + ports: + - '127.0.0.1:8123:8123' + - '127.0.0.1:9000:9000' + depends_on: + - clickhouse-keeper + clickhouse-keeper: + image: 'clickhouse/clickhouse-keeper:${CHKVER:-latest-alpine}' + user: '101:101' + container_name: clickhouse-keeper + hostname: clickhouse-keeper + volumes: + - ${PWD}/fs/volumes/clickhouse-keeper/etc/clickhouse-keeper/keeper_config.xml:/etc/clickhouse-keeper/keeper_config.xml + ports: + - '127.0.0.1:9181:9181' diff --git a/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/fs/volumes/clickhouse-keeper/etc/clickhouse-keeper/keeper_config.xml b/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/fs/volumes/clickhouse-keeper/etc/clickhouse-keeper/keeper_config.xml new file mode 100644 index 0000000..e0c97dd --- /dev/null +++ b/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/fs/volumes/clickhouse-keeper/etc/clickhouse-keeper/keeper_config.xml @@ -0,0 +1,28 @@ + + + information + /var/log/clickhouse-keeper/clickhouse-keeper.log + /var/log/clickhouse-keeper/clickhouse-keeper.err.log + 1000M + 3 + + 0.0.0.0 + + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + 10000 + 30000 + information + + + + 1 + clickhouse-keeper + 9234 + + + + diff --git a/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/fs/volumes/clickhouse/docker-entrypoint-initdb.d/00_create_db.sh b/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/fs/volumes/clickhouse/docker-entrypoint-initdb.d/00_create_db.sh new file mode 100755 index 0000000..fc9eb35 --- /dev/null +++ b/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/fs/volumes/clickhouse/docker-entrypoint-initdb.d/00_create_db.sh @@ -0,0 +1,5 @@ +#!/bin/bash +set -e +clickhouse client -n <<-EOSQL +CREATE DATABASE imdb; +EOSQL diff --git a/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/fs/volumes/clickhouse/docker-entrypoint-initdb.d/01_create_imdb_actors.sh b/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/fs/volumes/clickhouse/docker-entrypoint-initdb.d/01_create_imdb_actors.sh new file mode 100755 index 0000000..7d2c157 --- /dev/null +++ b/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/fs/volumes/clickhouse/docker-entrypoint-initdb.d/01_create_imdb_actors.sh @@ -0,0 +1,5 @@ +#!/bin/bash +set -e +clickhouse client -n <<-EOSQL +CREATE TABLE imdb.actors (id UInt32, first_name String, last_name String,gender FixedString(1)) ENGINE = MergeTree ORDER BY (id, first_name, last_name, gender); +EOSQL diff --git a/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/fs/volumes/clickhouse/docker-entrypoint-initdb.d/02_create_imdb_directors.sh b/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/fs/volumes/clickhouse/docker-entrypoint-initdb.d/02_create_imdb_directors.sh new file mode 100755 index 0000000..3def5b9 --- /dev/null +++ b/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/fs/volumes/clickhouse/docker-entrypoint-initdb.d/02_create_imdb_directors.sh @@ -0,0 +1,5 @@ +#!/bin/bash +set -e +clickhouse client -n <<-EOSQL +CREATE TABLE imdb.directors (id UInt32,first_name String,last_name String) ENGINE = MergeTree ORDER BY (id, first_name, last_name); +EOSQL diff --git a/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/fs/volumes/clickhouse/docker-entrypoint-initdb.d/03_create_imdb_genres.sh b/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/fs/volumes/clickhouse/docker-entrypoint-initdb.d/03_create_imdb_genres.sh new file mode 100755 index 0000000..254bfa4 --- /dev/null +++ b/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/fs/volumes/clickhouse/docker-entrypoint-initdb.d/03_create_imdb_genres.sh @@ -0,0 +1,5 @@ +#!/bin/bash +set -e +clickhouse client -n <<-EOSQL +CREATE TABLE imdb.genres (movie_id UInt32,genre String) ENGINE = MergeTree ORDER BY (movie_id, genre); +EOSQL diff --git a/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/fs/volumes/clickhouse/docker-entrypoint-initdb.d/04_create_imdb_movie_directors.sh b/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/fs/volumes/clickhouse/docker-entrypoint-initdb.d/04_create_imdb_movie_directors.sh new file mode 100755 index 0000000..270ed85 --- /dev/null +++ b/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/fs/volumes/clickhouse/docker-entrypoint-initdb.d/04_create_imdb_movie_directors.sh @@ -0,0 +1,5 @@ +#!/bin/bash +set -e +clickhouse client -n <<-EOSQL +CREATE TABLE imdb.movie_directors (director_id UInt32,movie_id UInt64) ENGINE = MergeTree ORDER BY (director_id, movie_id) +EOSQL diff --git a/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/fs/volumes/clickhouse/docker-entrypoint-initdb.d/05_create_imdb_movies.sh b/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/fs/volumes/clickhouse/docker-entrypoint-initdb.d/05_create_imdb_movies.sh new file mode 100755 index 0000000..c5edab9 --- /dev/null +++ b/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/fs/volumes/clickhouse/docker-entrypoint-initdb.d/05_create_imdb_movies.sh @@ -0,0 +1,5 @@ +#!/bin/bash +set -e +clickhouse client -n <<-EOSQL +CREATE TABLE imdb.movies (id UInt32 ,name String , year UInt32, rank Float32 DEFAULT 0) ENGINE = MergeTree ORDER BY (id, name, year); +EOSQL diff --git a/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/fs/volumes/clickhouse/docker-entrypoint-initdb.d/06_create_imdb_roles.sh b/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/fs/volumes/clickhouse/docker-entrypoint-initdb.d/06_create_imdb_roles.sh new file mode 100755 index 0000000..c267d2a --- /dev/null +++ b/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/fs/volumes/clickhouse/docker-entrypoint-initdb.d/06_create_imdb_roles.sh @@ -0,0 +1,5 @@ +#!/bin/bash +set -e +clickhouse client -n <<-EOSQL +CREATE TABLE imdb.roles (actor_id UInt32, movie_id UInt32, role String, created_at DateTime DEFAULT now()) ENGINE = MergeTree ORDER BY (actor_id, movie_id); +EOSQL diff --git a/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/fs/volumes/clickhouse/docker-entrypoint-initdb.d/07_insert.sh b/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/fs/volumes/clickhouse/docker-entrypoint-initdb.d/07_insert.sh new file mode 100755 index 0000000..4e143d9 --- /dev/null +++ b/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/fs/volumes/clickhouse/docker-entrypoint-initdb.d/07_insert.sh @@ -0,0 +1,9 @@ +#!/bin/bash +set -e +clickhouse client -n <<-EOSQL +INSERT INTO imdb.actors SELECT * FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/imdb/imdb_ijs_actors.tsv.gz', 'TSVWithNames'); +INSERT INTO imdb.directors SELECT * FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/imdb/imdb_ijs_directors.tsv.gz', 'TSVWithNames'); +INSERT INTO imdb.genres SELECT * FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/imdb/imdb_ijs_movies_genres.tsv.gz', 'TSVWithNames'); +INSERT INTO imdb.movies SELECT * FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/imdb/imdb_ijs_movies.tsv.gz', 'TSVWithNames'); +INSERT INTO imdb.roles (actor_id, movie_id, role) SELECT actor_id,movie_id,role FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/imdb/imdb_ijs_roles.tsv.gz', 'TSVWithNames'); +EOSQL diff --git a/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/fs/volumes/clickhouse/etc/clickhouse-server/config.d/config.xml b/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/fs/volumes/clickhouse/etc/clickhouse-server/config.d/config.xml new file mode 100644 index 0000000..6914a23 --- /dev/null +++ b/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/fs/volumes/clickhouse/etc/clickhouse-server/config.d/config.xml @@ -0,0 +1,30 @@ + + + debug + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + 1000M + 3 + + ch-1S_1K + 0.0.0.0 + 8123 + 9000 + + + users.xml + + + /var/lib/clickhouse/access/ + + + + /clickhouse/task_queue/ddl + + + + clickhouse-keeper + 9181 + + + diff --git a/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/fs/volumes/clickhouse/etc/clickhouse-server/users.d/users.xml b/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/fs/volumes/clickhouse/etc/clickhouse-server/users.d/users.xml new file mode 100644 index 0000000..0f32c64 --- /dev/null +++ b/docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/fs/volumes/clickhouse/etc/clickhouse-server/users.d/users.xml @@ -0,0 +1,37 @@ + + + + + 10000000000 + 0 + in_order + 1 + + + + + 1 + default + + ::/0 + + default + 1 + 1 + 1 + 1 + + + + + + 3600 + 0 + 0 + 0 + 0 + 0 + + + +