Skip to content

Commit

Permalink
by default add flink historyserver to deployment
Browse files Browse the repository at this point in the history
  • Loading branch information
ranchodeluxe committed Nov 24, 2023
1 parent 5e98329 commit b47cc34
Show file tree
Hide file tree
Showing 18 changed files with 442 additions and 11 deletions.
23 changes: 23 additions & 0 deletions helm-charts/flink-historyserver/.helmignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/
24 changes: 24 additions & 0 deletions helm-charts/flink-historyserver/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
apiVersion: v2
name: flink-historyserver
description: A Helm chart for Kubernetes

# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application

# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.2

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "0.1.2"
62 changes: 62 additions & 0 deletions helm-charts/flink-historyserver/templates/_helpers.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "flink-historyserver.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "flink-historyserver.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}

{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "flink-historyserver.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Common labels
*/}}
{{- define "flink-historyserver.labels" -}}
helm.sh/chart: {{ include "flink-historyserver.chart" . }}
{{ include "flink-historyserver.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}

{{/*
Selector labels
*/}}
{{- define "flink-historyserver.selectorLabels" -}}
app.kubernetes.io/name: {{ include "flink-historyserver.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}

{{/*
Create the name of the service account to use
*/}}
{{- define "flink-historyserver.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "flink-historyserver.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}
9 changes: 9 additions & 0 deletions helm-charts/flink-historyserver/templates/configmap.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: historyserver-config
data:
flink-conf.yaml: |
{{- .Values.flinkConfigMap | nindent 4 }}
log4j-console.properties: |
{{- .Values.log4jConfigMap | nindent 4 }}
37 changes: 37 additions & 0 deletions helm-charts/flink-historyserver/templates/efs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: {{ .Release.Name }}-efs-flink-history
parameters:
provisioningMode: efs-ap
fileSystemId: "{{- .Values.efsFileSystemId }}"
provisioner: "efs.csi.aws.com"
---
apiVersion: v1
kind: PersistentVolume
metadata:
name: flink-historyserver-efs-pv
spec:
capacity:
storage: "1Mi"
volumeMode: "Filesystem"
accessModes:
- "ReadWriteMany"
# 'persistentVolumeReclaimPolicy' means EFS volumes must be manually cleaned up when testing is done
persistentVolumeReclaimPolicy: Retain
storageClassName: {{ .Release.Name }}-efs-flink-history
csi:
driver: "efs.csi.aws.com"
volumeHandle: "{{- .Values.efsFileSystemId }}"
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: flink-historyserver-efs-pvc
spec:
accessModes:
- ReadWriteMany
storageClassName: {{ .Release.Name }}-efs-flink-history
resources:
requests:
storage: 1Mi
121 changes: 121 additions & 0 deletions helm-charts/flink-historyserver/templates/historyserver.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
apiVersion: apps/v1
kind: Deployment
metadata:
generation: 1
labels:
app: historyserver
name: historyserver
namespace: default
spec:
progressDeadlineSeconds: 150
replicas: 1
revisionHistoryLimit: 10
selector:
matchLabels:
app: historyserver
strategy:
rollingUpdate:
maxSurge: 25%
maxUnavailable: 25%
type: RollingUpdate
template:
metadata:
creationTimestamp: null
labels:
app: historyserver
spec:
containers:
- args:
- history-server
command:
- /docker-entrypoint.sh
env:
- name: _POD_IP_ADDRESS
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: status.podIP
image: flink:{{- .Values.flinkVersion }}
imagePullPolicy: IfNotPresent
name: flink-main-container
ports:
- containerPort: 8082
name: history
protocol: TCP
resources:
limits:
cpu: 300m
memory: 1536Mi
requests:
cpu: 300m
memory: 1536Mi
startupProbe:
failureThreshold: 2147483647
httpGet:
path: /config
port: history
scheme: HTTP
periodSeconds: 1
successThreshold: 1
timeoutSeconds: 1
volumeMounts:
- mountPath: /opt/history/jobs
name: efs-flink-history
- mountPath: /opt/flink/conf
name: flink-config-volume
dnsPolicy: ClusterFirst
initContainers:
- command:
- sh
- -c
- chown 9999:9999 /opt/history/jobs && ls -lhd /opt/history/jobs
image: busybox:1.36.1
imagePullPolicy: IfNotPresent
name: efs-mount-ownership-fix
resources: {}
volumeMounts:
- mountPath: /opt/history/jobs
name: efs-flink-history
restartPolicy: Always
schedulerName: default-scheduler
securityContext:
fsGroup: 9999
serviceAccount: flink
serviceAccountName: flink
terminationGracePeriodSeconds: 30
volumes:
- name: efs-flink-history
persistentVolumeClaim:
claimName: flink-historyserver-efs-pvc
- configMap:
defaultMode: 420
items:
- key: log4j-console.properties
path: log4j-console.properties
- key: flink-conf.yaml
path: flink-conf.yaml
name: historyserver-config
name: flink-config-volume
---
apiVersion: v1
kind: Service
metadata:
labels:
app: historyserver
name: historyserver-rest
namespace: default
spec:
ipFamilies:
- IPv4
ipFamilyPolicy: SingleStack
ports:
- name: history
port: 8082
protocol: TCP
targetPort: 8082
selector:
app: historyserver
sessionAffinity: None
type: ClusterIP
status:
loadBalancer: {}
Empty file.
19 changes: 19 additions & 0 deletions helm-charts/flink-historyserver/values.schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"required": ["efsFileSystemId", "log4jConfigMap", "flinkConfigMap"],
"properties": {
"efsFileSystemId": {
"type": "string",
"minLength": 1
},
"log4jConfigMap": {
"type": "string",
"minLength": 1
},
"flinkConfigMap": {
"type": "string",
"minLength": 1
}
}
}
4 changes: 4 additions & 0 deletions helm-charts/flink-historyserver/values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
efsFileSystemId: ""
flinkConfigMap: ""
log4jConfigMap: ""
flinkVersion: "1.16"
1 change: 1 addition & 0 deletions terraform/aws/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.yaml
24 changes: 24 additions & 0 deletions terraform/aws/addons.tf
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,28 @@ resource "aws_eks_addon" "ebs_provisioner" {
depends_on = [
aws_iam_role_policy_attachment.ebs_provisioner
]
}

# EFS CSI Driver for HistoryServer
resource "aws_iam_role" "efs_provisioner" {
name = "${var.cluster_name}-eks-efs-provisioner"
assume_role_policy = data.aws_iam_policy_document.assume_role_with_oidc.json
}

resource "aws_iam_role_policy_attachment" "efs_provisioner" {
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonEFSCSIDriverPolicy"
role = aws_iam_role.efs_provisioner.name
}

resource "aws_eks_addon" "efs_provisioner" {
cluster_name = aws_eks_cluster.cluster.name
addon_name = "aws-efs-csi-driver"
# Fetch the most recent version for your current version of k8s
# AWS_PROFILE=<your-profile> eksctl utils describe-addon-versions --kubernetes-version 1.27 -v0 | jq '.Addons[] | select(.AddonName == "aws-efs-csi-driver") | .AddonVersions[0]'
addon_version = "v1.7.0-eksbuild.1"
resolve_conflicts_on_create = "OVERWRITE"
service_account_role_arn = aws_iam_role.efs_provisioner.arn
depends_on = [
aws_iam_role_policy_attachment.efs_provisioner
]
}
2 changes: 1 addition & 1 deletion terraform/aws/cluster.tf
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ resource "aws_iam_openid_connect_provider" "cluster_oidc" {
module "cluster_autoscaler_irsa" {
source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks"

role_name = "cluster_autoscaler"
role_name = "${var.cluster_name}_cluster_autoscaler"

attach_cluster_autoscaler_policy = true
cluster_autoscaler_cluster_ids = [
Expand Down
10 changes: 10 additions & 0 deletions terraform/aws/efs.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
resource "aws_efs_file_system" "job_history" {
creation_token = "pforge-flink-job-history"
}

resource "aws_efs_mount_target" "job_history" {
for_each = toset(data.aws_subnets.default.ids)
file_system_id = aws_efs_file_system.job_history.id
subnet_id = each.value
security_groups = [aws_eks_cluster.cluster.vpc_config[0].cluster_security_group_id]
}
7 changes: 7 additions & 0 deletions terraform/aws/flink_operator_config.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kubernetes.operator.metrics.reporter.prom.factory.class: org.apache.flink.metrics.prometheus.PrometheusReporterFactory
kubernetes.operator.metrics.reporter.prom.factory.port: 9999
kubernetes.jobmanager.annotations:
prometheus.io/scrape: true
prometheus.io/port: 9999
jobmanager.archive.fs.dir: /opt/history/jobs
historyserver.archive.fs.dir: /opt/history/jobs
Loading

0 comments on commit b47cc34

Please sign in to comment.