Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
nellicus committed Feb 14, 2024
2 parents 4a383cd + 39d2f14 commit b98e3ef
Show file tree
Hide file tree
Showing 15 changed files with 181 additions and 0 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ A collection of data required to back our own ClickHouse official [Blogs](clickh
A [list](./docker-compose-recipes/README.md) of ClickHouse recipes using docker compose:

- Clickhouse single node with Keeper
- Clickhouse single node with Keeper and IMDB dataset
- ClickHouse and Grafana
- ClickHouse and MSSQL Server 2022
- ClickHouse and MinIO S3
Expand Down
1 change: 1 addition & 0 deletions docker-compose-recipes/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
A list of ClickHouse docker compose recipes

- [Clickhouse single node with Keeper](./recipes/ch-1S_1K/README.md)
- [Clickhouse single node with Keeper and IMDB dataset](./recipes/ch-1S_1K_IMDB_dataset/README.md)
- [ClickHouse and Grafana](./recipes/ch-and-grafana/README.md)
- [ClickHouse and MSSQL Server 2022](./recipes/ch-and-mssql/README.md)
- [ClickHouse and MinIO S3](./recipes/ch-and-minio-S3/README.md)
Expand Down
15 changes: 15 additions & 0 deletions docker-compose-recipes/recipes/ch-1S_1K_IMDB_dataset/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# ClickHouse ch-1S_1K_IMDB_dataset

Single node ClickHouse instance leveraging 1 ClickHouse Keeper with IMDB dataset

By default the version of ClickHouse used will be `latest`, and ClickHouse Keeper
will be `latest-alpine`. You can specify specific versions by setting environment
variables before running `docker compose up`.

This recipe simply automates the [IMDB dataset](https://en.wikipedia.org/wiki/IMDb) loading illustrated [here](https://clickhouse.com/docs/en/integrations/dbt#prepare-clickhouse).

```bash
export CHVER=23.4
export CHKVER=23.4-alpine
docker compose up
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
version: '3.8'
services:
clickhouse:
image: 'clickhouse/clickhouse-server:${CHVER:-latest}'
user: '101:101'
container_name: clickhouse
hostname: clickhouse
volumes:
- ${PWD}/fs/volumes/clickhouse/etc/clickhouse-server/config.d/config.xml:/etc/clickhouse-server/config.d/config.xml
- ${PWD}/fs/volumes/clickhouse/etc/clickhouse-server/users.d/users.xml:/etc/clickhouse-server/users.d/users.xml
- ${PWD}/fs/volumes/clickhouse/docker-entrypoint-initdb.d:/docker-entrypoint-initdb.d
ports:
- '127.0.0.1:8123:8123'
- '127.0.0.1:9000:9000'
depends_on:
- clickhouse-keeper
clickhouse-keeper:
image: 'clickhouse/clickhouse-keeper:${CHKVER:-latest-alpine}'
user: '101:101'
container_name: clickhouse-keeper
hostname: clickhouse-keeper
volumes:
- ${PWD}/fs/volumes/clickhouse-keeper/etc/clickhouse-keeper/keeper_config.xml:/etc/clickhouse-keeper/keeper_config.xml
ports:
- '127.0.0.1:9181:9181'
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
<clickhouse replace="true">
<logger>
<level>information</level>
<log>/var/log/clickhouse-keeper/clickhouse-keeper.log</log>
<errorlog>/var/log/clickhouse-keeper/clickhouse-keeper.err.log</errorlog>
<size>1000M</size>
<count>3</count>
</logger>
<listen_host>0.0.0.0</listen_host>
<keeper_server>
<tcp_port>9181</tcp_port>
<server_id>1</server_id>
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
<snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
<coordination_settings>
<operation_timeout_ms>10000</operation_timeout_ms>
<session_timeout_ms>30000</session_timeout_ms>
<raft_logs_level>information</raft_logs_level>
</coordination_settings>
<raft_configuration>
<server>
<id>1</id>
<hostname>clickhouse-keeper</hostname>
<port>9234</port>
</server>
</raft_configuration>
</keeper_server>
</clickhouse>
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash
set -e
clickhouse client -n <<-EOSQL
CREATE DATABASE imdb;
EOSQL
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash
set -e
clickhouse client -n <<-EOSQL
CREATE TABLE imdb.actors (id UInt32, first_name String, last_name String,gender FixedString(1)) ENGINE = MergeTree ORDER BY (id, first_name, last_name, gender);
EOSQL
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash
set -e
clickhouse client -n <<-EOSQL
CREATE TABLE imdb.directors (id UInt32,first_name String,last_name String) ENGINE = MergeTree ORDER BY (id, first_name, last_name);
EOSQL
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash
set -e
clickhouse client -n <<-EOSQL
CREATE TABLE imdb.genres (movie_id UInt32,genre String) ENGINE = MergeTree ORDER BY (movie_id, genre);
EOSQL
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash
set -e
clickhouse client -n <<-EOSQL
CREATE TABLE imdb.movie_directors (director_id UInt32,movie_id UInt64) ENGINE = MergeTree ORDER BY (director_id, movie_id)
EOSQL
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash
set -e
clickhouse client -n <<-EOSQL
CREATE TABLE imdb.movies (id UInt32 ,name String , year UInt32, rank Float32 DEFAULT 0) ENGINE = MergeTree ORDER BY (id, name, year);
EOSQL
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash
set -e
clickhouse client -n <<-EOSQL
CREATE TABLE imdb.roles (actor_id UInt32, movie_id UInt32, role String, created_at DateTime DEFAULT now()) ENGINE = MergeTree ORDER BY (actor_id, movie_id);
EOSQL
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash
set -e
clickhouse client -n <<-EOSQL
INSERT INTO imdb.actors SELECT * FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/imdb/imdb_ijs_actors.tsv.gz', 'TSVWithNames');
INSERT INTO imdb.directors SELECT * FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/imdb/imdb_ijs_directors.tsv.gz', 'TSVWithNames');
INSERT INTO imdb.genres SELECT * FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/imdb/imdb_ijs_movies_genres.tsv.gz', 'TSVWithNames');
INSERT INTO imdb.movies SELECT * FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/imdb/imdb_ijs_movies.tsv.gz', 'TSVWithNames');
INSERT INTO imdb.roles (actor_id, movie_id, role) SELECT actor_id,movie_id,role FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/imdb/imdb_ijs_roles.tsv.gz', 'TSVWithNames');
EOSQL
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
<clickhouse replace="true">
<logger>
<level>debug</level>
<log>/var/log/clickhouse-server/clickhouse-server.log</log>
<errorlog>/var/log/clickhouse-server/clickhouse-server.err.log</errorlog>
<size>1000M</size>
<count>3</count>
</logger>
<display_name>ch-1S_1K</display_name>
<listen_host>0.0.0.0</listen_host>
<http_port>8123</http_port>
<tcp_port>9000</tcp_port>
<user_directories>
<users_xml>
<path>users.xml</path>
</users_xml>
<local_directory>
<path>/var/lib/clickhouse/access/</path>
</local_directory>
</user_directories>
<distributed_ddl>
<path>/clickhouse/task_queue/ddl</path>
</distributed_ddl>
<zookeeper>
<node>
<host>clickhouse-keeper</host>
<port>9181</port>
</node>
</zookeeper>
</clickhouse>
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
<?xml version="1.0"?>
<clickhouse replace="true">
<profiles>
<default>
<max_memory_usage>10000000000</max_memory_usage>
<use_uncompressed_cache>0</use_uncompressed_cache>
<load_balancing>in_order</load_balancing>
<log_queries>1</log_queries>
</default>
</profiles>
<users>
<default>
<access_management>1</access_management>
<profile>default</profile>
<networks>
<ip>::/0</ip>
</networks>
<quota>default</quota>
<access_management>1</access_management>
<named_collection_control>1</named_collection_control>
<show_named_collections>1</show_named_collections>
<show_named_collections_secrets>1</show_named_collections_secrets>
</default>
</users>
<quotas>
<default>
<interval>
<duration>3600</duration>
<queries>0</queries>
<errors>0</errors>
<result_rows>0</result_rows>
<read_rows>0</read_rows>
<execution_time>0</execution_time>
</interval>
</default>
</quotas>
</clickhouse>

0 comments on commit b98e3ef

Please sign in to comment.