diff --git a/kubernetes/Readme.md b/kubernetes/Readme.md new file mode 100644 index 0000000..faf1567 --- /dev/null +++ b/kubernetes/Readme.md @@ -0,0 +1,75 @@ + # Prove of concept for ambari on kubernetes + +I was interested in if it is generally possible to run ambari on top of kubernetes. +If this is a good idea is a separate discussion, I just wanted to see if it is +teoretically Possible. + +## Installing + +run: +``` + kubectl create -f kubernetes/ in the root folder of this Project +``` + +this brings up a cluster with 1 ambari server and 5 ambari agent nodes. + +``` +kubectl get pods --namespace=ambari +NAME READY STATUS RESTARTS AGE +amb-agent-btj80 1/1 Running 0 13h +amb-agent-cn5qe 1/1 Running 0 13h +amb-agent-fwfre 1/1 Running 0 13h +amb-agent-sk80u 1/1 Running 0 13h +amb-agent-uxaec 1/1 Running 0 13h +ambari-server-leqn7 1/1 Running 0 13h +``` + +get a terminal in the master node: + +``` +kubectl exec -ti --namespace=ambari /bin/bash +``` + +and paste + +``` +export BLUEPRINT=multi-node-hdfs-yarn +export EXPECTED_HOST_COUNT=5 +/tmp/install-cluster.sh +``` + +### Acces the ambari web interface + +In order to access the web interface you have to modify the file ambari-web-service-sc.yml +to provice an external loadbalancer / url to acces according to your kubernetes cluster. + + +## Issues + +I had to modify the setting dfs.namenode.datanode.registration.ip-hostname-check to false in hdfs-site.xml. +I Used the ambari web ui for doing this, and restarted the affected servies. +Without this the datanodes where not starting. + +It would be nice if I could use the ambari shell to set this property befor starting the cluster. + +Next issue is that there are a lot of other extarnal services / web uis running +on different nodes in the cluster which kubernetes is not aware of. So I could +not use the kubernetes native means to expose them to the internet. One solution +might be to do a port mapping for every possible service port on every agent pod, +but to do so I need a list of the possible ports that have to be accessable from +the internet. + +## TODO + +* provide something similar to docker functions in ambari-functions for kubernetes +* find a solution for exposing the different web interfaces exposed by services in the cluster. +* This is a poc that shows that it is possible to run ambari on kubernetes. +Still the question is if it makes sense. To have an on demand hadoop infrastructure +it is probably ok. To run this as a long living cluster it would be probably necessary +to dedicated nodes in the cluster for the different services. For example containers +that only do map reduce and kind of "know" there purpose. These could maby use +the ambari rest api to register them selves into the cluster, and also deregister +if they are stopped. +This way you could use kubernetes autoscaling groups for these containers and it +would be easyser to use kubernetes services to discover hadoop services. But I don't +know if the rest api / ambari shell is powerful enoug to do this. diff --git a/kubernetes/ambari-agent-config.yml b/kubernetes/ambari-agent-config.yml new file mode 100644 index 0000000..4d7f08a --- /dev/null +++ b/kubernetes/ambari-agent-config.yml @@ -0,0 +1,15 @@ +kind: ConfigMap +apiVersion: v1 +metadata: + name: ambari-agent + namespace: ambari +data: + 10-noconusul.conf: | + [Service] + Environment="USE_CONSUL_DNS=false" + 20-ambari-server.conf: | + [Service] + Environment="AMBARI_SERVER_ADDR=ambari-server.ambari.svc.cluster.local" + 30-ambari-internal-hostname.conf: | + [Service] + ExecStartPre=/bin/bash /scripts/write-custom-hostname.sh diff --git a/kubernetes/ambari-agent-custom-scripts.yml b/kubernetes/ambari-agent-custom-scripts.yml new file mode 100644 index 0000000..b6122d5 --- /dev/null +++ b/kubernetes/ambari-agent-custom-scripts.yml @@ -0,0 +1,12 @@ +kind: ConfigMap +apiVersion: v1 +metadata: + name: ambari-agent-scripts + namespace: ambari +data: + write-custom-hostname.sh: | + #!/bin/bash + echo -e "#/bin/bash\necho \$( ip addr show | grep \" inet 10\" | awk '{ print \$2}' | tr . - | sed -e s#/24##).ambari.pod.cluster.local" > /etc/ambari-agent/conf/internal-hostname.sh + cat /etc/hosts | sed -e "s/$HOSTNAME/$(/etc/ambari-agent/conf/internal-hostname.sh)/" > /etc/hosts + echo "$(/etc/ambari-agent/conf/internal-hostname.sh)" > /etc/hostname + hostname $(/etc/ambari-agent/conf/internal-hostname.sh) diff --git a/kubernetes/ambari-agent.yml b/kubernetes/ambari-agent.yml new file mode 100644 index 0000000..8c6747a --- /dev/null +++ b/kubernetes/ambari-agent.yml @@ -0,0 +1,45 @@ +--- +apiVersion: v1 +kind: ReplicationController +metadata: + name: amb-agent + namespace: ambari + labels: + name: amb-agent +spec: + replicas: 5 + selector: + name: amb-agent + template: + metadata: + labels: + name: amb-agent + spec: + containers: + - name: amb-agent + image: hortonworks/ambari-agent:2.2.1-v20 + securityContext: + privileged: true + resources: + requests: + memory: "16G" + cpu: "2000m" + limits: + memory: "20G" + cpu: "3000m" + volumeMounts: + - name: ambari-conf + mountPath: /etc/systemd/system/ambari-agent.service.d + - name: ambari-scripts + mountPath: /scripts + env: + - name: AMBARI_SERVER + value: ambari-server.ambari.svc.cluster.local + volumes: + - name: ambari-conf + configMap: + name: ambari-agent + - name: ambari-scripts + configMap: + name: ambari-agent-scripts + diff --git a/kubernetes/ambari-server-config.yml b/kubernetes/ambari-server-config.yml new file mode 100644 index 0000000..bef8750 --- /dev/null +++ b/kubernetes/ambari-server-config.yml @@ -0,0 +1,13 @@ +kind: ConfigMap +apiVersion: v1 +metadata: + name: ambari-server + namespace: ambari +data: + 10-noconusul.conf: | + [Service] + Environment="USE_CONSUL_DNS=false" + 10-hostname.conf: | + [Service] + ExecStartPre=/bin/bash /scripts/write-custom-hostname.sh + diff --git a/kubernetes/ambari-server-custom-scripts.yml b/kubernetes/ambari-server-custom-scripts.yml new file mode 100644 index 0000000..06b865c --- /dev/null +++ b/kubernetes/ambari-server-custom-scripts.yml @@ -0,0 +1,13 @@ +kind: ConfigMap +apiVersion: v1 +metadata: + name: ambari-server-scripts + namespace: ambari +data: + write-custom-hostname.sh: | + #!/bin/bash + echo -e "#/bin/bash\necho \$( ip addr show | grep \"inet 10\" | awk '{ print \$2}' | tr . - | sed -e s#/24##).ambari.pod.cluster.local" > /etc/ambari-server/conf/internal-hostname.sh + chmod +x /etc/ambari-server/conf/internal-hostname.sh + cat /etc/hosts | sed -e "s/$HOSTNAME/$(/etc/ambari-server/conf/internal-hostname.sh)/" > /etc/hosts + echo "$(/etc/ambari-server/conf/internal-hostname.sh)" > /etc/hostname + hostname $(/etc/ambari-server/conf/internal-hostname.sh) diff --git a/kubernetes/ambari-server.yml b/kubernetes/ambari-server.yml new file mode 100644 index 0000000..5f8359d --- /dev/null +++ b/kubernetes/ambari-server.yml @@ -0,0 +1,46 @@ +--- +apiVersion: v1 +kind: ReplicationController +metadata: + name: ambari-server + namespace: ambari + labels: + name: ambari-server +spec: + replicas: 1 + selector: + name: ambari-server + template: + metadata: + labels: + name: ambari-server + spec: + containers: + - name: ambari-server + securityContext: + privileged: true + image: hortonworks/ambari-server:2.2.1-v20 + volumeMounts: + - name: ambari-conf + mountPath: /etc/systemd/system/ambari-server.service.d + - name: ambari-scripts + mountPath: /scripts + env: + - name: USE_CONSUL_DNS + value: "false" + - name: AMBARI_SERVER + value: ambari-server.ambari.svc.cluster.local + - name: AMBARI_HOST + value: ambari-server.ambari.svc.cluster.local + + ports: + - containerPort: 8080 + volumes: + - name: ambari-conf + configMap: + name: ambari-server + - name: ambari-scripts + configMap: + name: ambari-server-scripts + + diff --git a/kubernetes/ambari-service-sc.yml b/kubernetes/ambari-service-sc.yml new file mode 100644 index 0000000..09396df --- /dev/null +++ b/kubernetes/ambari-service-sc.yml @@ -0,0 +1,27 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: ambari-server + namespace: ambari + labels: + name: ambari-server + + +spec: + ports: + # the port that this service should serve on + - port: 8080 + name: web + targetPort: 8080 + protocol: TCP + - port: 8440 + name: api + targetPort: 8440 + protocol: TCP + - port: 8441 + name: api2 + targetPort: 8441 + protocol: TCP + selector: + name: ambari-server diff --git a/kubernetes/ambari-web-service-sc.yml b/kubernetes/ambari-web-service-sc.yml new file mode 100644 index 0000000..83b7198 --- /dev/null +++ b/kubernetes/ambari-web-service-sc.yml @@ -0,0 +1,18 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: ambari-server-web-lb + namespace: ambari + labels: + name: ambari-server + +spec: + ports: + # the port that this service should serve on + - port: 8080 + targetPort: 8080 + protocol: TCP + type: loadbalancer + selector: + name: ambari-server diff --git a/kubernetes/install-cluster.sh b/kubernetes/install-cluster.sh new file mode 100644 index 0000000..861db41 --- /dev/null +++ b/kubernetes/install-cluster.sh @@ -0,0 +1,4 @@ +export BLUEPRINT=multi-node-hdfs-yarn +export EXPECTED_HOST_COUNT=5 +/tmp/install-cluster.sh +