Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hive metastore (docker + helm chart) #1

Merged
merged 3 commits into from
Feb 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/docker-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ jobs:
id: build-and-push
uses: docker/[email protected]
with:
context: .
context: ./docker/
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
Expand Down
1 change: 1 addition & 0 deletions docker/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
README.md
54 changes: 54 additions & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
FROM openjdk:16-slim

ARG HADOOP_VERSION=3.3.0
ARG HIVE_METASTORE_VERSION=3.0.0
ARG POSTGRES_CONNECTOR_VERSION=42.2.18

WORKDIR /app
SHELL ["/bin/bash", "-o", "pipefail", "-c"]

# hadolint ignore=DL3008
RUN echo "Install OS dependencies" && \
build_deps="curl" && \
apt-get update -y && \
apt-get install -y $build_deps net-tools gettext-base --no-install-recommends
RUN echo "Download and extract the Hadoop binary package" && \
curl https://archive.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz \
| tar xvz -C /opt/ && \
ln -s /opt/hadoop-$HADOOP_VERSION /opt/hadoop && \
rm -r /opt/hadoop/share/doc
RUN echo "Download and install the standalone metastore binary" && \
curl https://downloads.apache.org/hive/hive-standalone-metastore-$HIVE_METASTORE_VERSION/hive-standalone-metastore-$HIVE_METASTORE_VERSION-bin.tar.gz \
| tar xvz -C /opt/ && \
ln -s /opt/apache-hive-metastore-$HIVE_METASTORE_VERSION-bin /opt/hive-metastore
RUN echo "Fix 'java.lang.NoSuchMethodError: com.google.common.base.Preconditions.checkArgument'" && \
echo "Keep this until this lands: https://issues.apache.org/jira/browse/HIVE-22915" && \
rm /opt/apache-hive-metastore-$HIVE_METASTORE_VERSION-bin/lib/guava-19.0.jar && \
cp /opt/hadoop-$HADOOP_VERSION/share/hadoop/hdfs/lib/guava-27.0-jre.jar /opt/apache-hive-metastore-$HIVE_METASTORE_VERSION-bin/lib/
RUN echo "Download and install the database connector" && \
curl -L https://jdbc.postgresql.org/download/postgresql-$POSTGRES_CONNECTOR_VERSION.jar --output /opt/postgresql-$POSTGRES_CONNECTOR_VERSION.jar && \
ln -s /opt/postgresql-$POSTGRES_CONNECTOR_VERSION.jar /opt/hadoop/share/hadoop/common/lib/ && \
ln -s /opt/postgresql-$POSTGRES_CONNECTOR_VERSION.jar /opt/hive-metastore/lib/
RUN echo "Purge build artifacts" && \
apt-get purge -y --auto-remove $build_deps && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

COPY hive-site.xml hive-site_TEMPLATE.xml
COPY metastore-site.xml metastore-site_TEMPLATE.xml
COPY --chmod=755 run.sh run.sh

ENV HADOOP_HOME="/opt/hadoop"
ENV PATH="/opt/hadoop/bin:${PATH}"
ENV METASTORE_PORT=9083
ENV DATABASE_DRIVER=org.postgresql.Driver
ENV DATABASE_TYPE=postgres
ENV DATABASE_TYPE_JDBC=postgresql
# ENV DATABASE_PORT=5432
# ENV DATABASE_USER=admin
# ENV DATABASE_PASSWORD=pass
# ENV DATABASE_DB=metastore
# ENV DATABASE_HOST=metastore-db

CMD [ "./run.sh" ]
HEALTHCHECK CMD [ "sh", "-c", "netstat -ln | grep 9083" ]
11 changes: 11 additions & 0 deletions docker/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Hive metastore standalone image
Hive metastore requires metastore db. This image requires Postgres DB.
## Required parameters
Hive metastore requires postgres conneciton parameters.
| Param name | Example |
| -----------|-------- |
|DATABASE_PORT|5432|
|DATABASE_USER|admin|
|DATABASE_PASSWORD|pass|
|DATABASE_DB|mestastore|
|DATABASE_HOST|pghost|
18 changes: 18 additions & 0 deletions docker/hive-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<configuration>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>${DATABASE_DRIVER}</value>
</property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:${DATABASE_TYPE_JDBC}://${DATABASE_HOST}:${DATABASE_PORT}/${DATABASE_DB}</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>${DATABASE_USER}</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>${DATABASE_PASSWORD}</value>
</property>
</configuration>
30 changes: 30 additions & 0 deletions docker/metastore-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
<configuration>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>${DATABASE_DRIVER}</value>
</property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:${DATABASE_TYPE_JDBC}://${DATABASE_HOST}:${DATABASE_PORT}/${DATABASE_DB}</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>${DATABASE_USER}</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>${DATABASE_PASSWORD}</value>
</property>
<property>
<name>metastore.task.threads.always</name>
<value>org.apache.hadoop.hive.metastore.events.EventCleanerTask</value>
</property>
<property>
<name>metastore.expression.proxy</name>
<value>org.apache.hadoop.hive.metastore.DefaultPartitionExpressionProxy</value>
</property>
<property>
<name>metastore.thrift.port</name>
<value>9083</value>
</property>
</configuration>
19 changes: 19 additions & 0 deletions docker/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/usr/bin/env bash

set -euxo pipefail

envsubst < hive-site_TEMPLATE.xml > /opt/hadoop/etc/hadoop/hive-site.xml
envsubst < metastore-site_TEMPLATE.xml > /opt/hive-metastore/conf/metastore-site.xml

cat /opt/hadoop/etc/hadoop/hive-site.xml
cat /opt/hive-metastore/conf/metastore-site.xml

if /opt/hive-metastore/bin/schematool -dbType "$DATABASE_TYPE" -validate | grep 'Done with metastore validation' | grep '[SUCCESS]'; then
echo 'Metastore already initialized'
else
echo 'Metastore - initializing schema'
/opt/hive-metastore/bin/schematool --verbose -dbType "$DATABASE_TYPE" -initSchema
echo 'Metastore - schema initialized'
fi

/opt/hive-metastore/bin/start-metastore
23 changes: 23 additions & 0 deletions hive-metastore/.helmignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/
24 changes: 24 additions & 0 deletions hive-metastore/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
apiVersion: v2
name: hive-metastore
description: A Helm chart for Kubernetes

# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application

# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.0

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.16.0"
22 changes: 22 additions & 0 deletions hive-metastore/templates/NOTES.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
1. Get the application URL by running these commands:
{{- if .Values.ingress.enabled }}
{{- range $host := .Values.ingress.hosts }}
{{- range .paths }}
http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
{{- end }}
{{- end }}
{{- else if contains "NodePort" .Values.service.type }}
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "hive-metastore.fullname" . }})
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
echo thrift://$NODE_IP:$NODE_PORT
{{- else if contains "LoadBalancer" .Values.service.type }}
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "hive-metastore.fullname" . }}'
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "hive-metastore.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
echo http://$SERVICE_IP:{{ .Values.service.port }}
{{- else if contains "ClusterIP" .Values.service.type }}
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "hive-metastore.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
echo "Visit http://127.0.0.1:8080 to use your application"
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
{{- end }}
62 changes: 62 additions & 0 deletions hive-metastore/templates/_helpers.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "hive-metastore.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "hive-metastore.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}

{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "hive-metastore.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Common labels
*/}}
{{- define "hive-metastore.labels" -}}
helm.sh/chart: {{ include "hive-metastore.chart" . }}
{{ include "hive-metastore.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}

{{/*
Selector labels
*/}}
{{- define "hive-metastore.selectorLabels" -}}
app.kubernetes.io/name: {{ include "hive-metastore.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}

{{/*
Create the name of the service account to use
*/}}
{{- define "hive-metastore.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "hive-metastore.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}
85 changes: 85 additions & 0 deletions hive-metastore/templates/deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "hive-metastore.fullname" . }}
labels:
{{- include "hive-metastore.labels" . | nindent 4 }}
spec:
{{- if not .Values.autoscaling.enabled }}
replicas: {{ .Values.replicaCount }}
{{- end }}
selector:
matchLabels:
{{- include "hive-metastore.selectorLabels" . | nindent 6 }}
template:
metadata:
{{- with .Values.podAnnotations }}
annotations:
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "hive-metastore.labels" . | nindent 8 }}
{{- with .Values.podLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
spec:
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
serviceAccountName: {{ include "hive-metastore.serviceAccountName" . }}
securityContext:
{{- toYaml .Values.podSecurityContext | nindent 8 }}
containers:
- name: {{ .Chart.Name }}
securityContext:
{{- toYaml .Values.securityContext | nindent 12 }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
env:
- name: DATABASE_HOST
value: "{{ .Values.postgres.host }}"
- name: DATABASE_PORT
value: "{{ .Values.postgres.port | default 5432 }}"
- name: DATABASE_DB
value: "{{ .Values.postgres.db }}"
- name: DATABASE_USER
valueFrom:
secretKeyRef:
key: username
name: {{ .Release.Name }}-pgauth
- name: DATABASE_PASSWORD
valueFrom:
secretKeyRef:
key: password
name: {{ .Release.Name }}-pgauth
ports:
- name: thrift
containerPort: {{ .Values.service.port }}
protocol: TCP
livenessProbe:
{{- toYaml .Values.livenessProbe | nindent 12 }}
readinessProbe:
{{- toYaml .Values.readinessProbe | nindent 12 }}
resources:
{{- toYaml .Values.resources | nindent 12 }}
{{- with .Values.volumeMounts }}
volumeMounts:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.volumes }}
volumes:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
Loading
Loading