From b00cada6ee64f7f0464cf51cf6e41d4c7f88d74d Mon Sep 17 00:00:00 2001 From: Tanawat Panmongkol <40226657+tanawatpan@users.noreply.github.com> Date: Sun, 23 Apr 2023 15:02:03 +0000 Subject: [PATCH] change Apache drill to Distributed Mode --- k8s_drill.tf | 72 +++++++++++++++++++++++++++++++++++++++++++++++----- main.tf | 8 ++++++ 2 files changed, 74 insertions(+), 6 deletions(-) diff --git a/k8s_drill.tf b/k8s_drill.tf index 52ac94a..0c54ee1 100644 --- a/k8s_drill.tf +++ b/k8s_drill.tf @@ -39,14 +39,29 @@ resource "kubernetes_service" "drill_service" { } } -resource "kubernetes_deployment" "drill" { +resource "kubernetes_service" "drills" { metadata { - name = "drill" + name = "drills" namespace = kubernetes_namespace.drill.metadata.0.name } spec { - replicas = 1 + selector = { + app = "drill" + } + cluster_ip = "None" + } +} + +resource "kubernetes_stateful_set" "drill" { + metadata { + name = local.drill.name + namespace = kubernetes_namespace.drill.metadata.0.name + } + + spec { + service_name = "drills" + replicas = local.drill.replicas selector { match_labels = { @@ -63,17 +78,58 @@ resource "kubernetes_deployment" "drill" { spec { container { - name = "drill" + name = local.drill.name image = "${local.drill.image.name}:${local.drill.image.tag}" security_context { run_as_user = 0 } + env { + name = "ZOO_KEEPER_URL" + value = local.drill.zookeeper.package_url + } + env { + name = "ZOO_DATA_DIR" + value = local.drill.zookeeper.data_directory + } + env { + name = "ZOO_HOME" + value = local.drill.zookeeper.home + } + env { + name = "REPLICAS" + value = local.drill.replicas + } + command = ["/bin/bash", "-c"] - args = [<> $ZOO_HOME/conf/zoo.cfg + + for ((i=0;i<$REPLICAS;i++)); do + echo "server.$i=${local.drill.name}-$i.${kubernetes_service.drills.metadata.0.name}.${kubernetes_namespace.drill.metadata.0.name}.svc.cluster.local:2888:3888" >> $ZOO_HOME/conf/zoo.cfg + NODES+="${local.drill.name}-$i.${kubernetes_service.drills.metadata.0.name}.${kubernetes_namespace.drill.metadata.0.name}.svc.cluster.local:${local.drill.zookeeper.port}," + done + + POD_NAME=$(hostname) + mkdir -p $${ZOO_DATA_DIR} + echo $${POD_NAME##*-} > $${ZOO_DATA_DIR}/myid + + $ZOO_HOME/bin/zkServer.sh start + + NODES=$${NODES:0:-1} + sed -i "s/\(zk.connect:\).*/\1 \"$NODES\"/" $$DRILL_HOME/conf/drill-override.conf + cat > $DRILL_HOME/conf/storage-plugins-override.conf <<-EOL "storage": { dfs: { @@ -244,6 +300,7 @@ resource "kubernetes_deployment" "drill" { } } EOL + cat > $DRILL_HOME/conf/core-site.xml <<-EOL @@ -256,11 +313,14 @@ resource "kubernetes_deployment" "drill" { EOL + rm -f $DRILL_HOME/jars/3rdparty/mongodb-driver-*.jar wget -P $DRILL_HOME/jars/3rdparty https://repo1.maven.org/maven2/org/mongodb/mongodb-driver-sync/${local.drill.mongodb_driver_version}/mongodb-driver-sync-${local.drill.mongodb_driver_version}.jar wget -P $DRILL_HOME/jars/3rdparty https://repo1.maven.org/maven2/org/mongodb/mongodb-driver-core/${local.drill.mongodb_driver_version}/mongodb-driver-core-${local.drill.mongodb_driver_version}.jar wget -P $DRILL_HOME/jars/3rdparty ${local.external_jars.gcs_connector} - $DRILL_HOME/bin/drill-embedded + + $DRILL_HOME/bin/drillbit.sh start + $DRILL_HOME/bin/sqlline EOT ] diff --git a/main.tf b/main.tf index 23155ec..66ac2c7 100644 --- a/main.tf +++ b/main.tf @@ -71,10 +71,18 @@ locals { } drill = { + name = "drill" + replicas = 2 image = { name = "apache/drill" tag = "latest-openjdk-11" } + zookeeper = { + port = 2181 + home = "/opt/zookeeper" + data_directory = "/var/lib/zookeeper" + package_url = "https://downloads.apache.org/zookeeper/stable/apache-zookeeper-3.7.1-bin.tar.gz" + } mongodb_driver_version = "4.4.2" }