Skip to content

Commit

Permalink
[refinery] Add option to run refinery as a statefulset
Browse files Browse the repository at this point in the history
Using a statefulset allows giving the pods a stable network identity.
Setting `setHostnameAsFQDN` means that this stable network identity is
what `os.Hostname` reports. Together, they allow using the hostname in
the peer list, so that the peer list remains stable even as pods are
rescheduled. This improves trace routing stability during refinery
upgrades and Kubernetes cluster operations (upgrades / scale-downs),
and even makes it possible to run refinery with an affinity preference
for spot instances.
  • Loading branch information
kamalmarhubi committed Jun 6, 2023
1 parent b7a3ac6 commit 41e6685
Show file tree
Hide file tree
Showing 6 changed files with 204 additions and 116 deletions.
118 changes: 118 additions & 0 deletions charts/refinery/templates/_pod.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
{{- define "refinery.pod" -}}
metadata:
annotations:
checksum/config: {{ include (print $.Template.BasePath "/configmap-config.yaml") . | sha256sum }}
{{- with .Values.podAnnotations }}
{{- toYaml . | nindent 4 }}
{{- end }}
{{- if eq .Values.config.Metrics "prometheus" }}
prometheus.io/port: "9090"
prometheus.io/scrape: "true"
{{- end }}
{{- if hasKey .Values.rules "LiveReload" | ternary (not .Values.rules.LiveReload) false }}
checksum/rules: {{ include (print $.Template.BasePath "/configmap-rules.yaml") . | sha256sum }}
{{- end }}
labels:
{{- include "refinery.selectorLabels" . | nindent 4 }}
{{- with .Values.podLabels }}
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 4 }}
{{- end }}
serviceAccountName: {{ include "refinery.serviceAccountName" . }}
{{- if eq .Values.mode "statefulset" }}
# The makes the pod hostnames be resolvable.
setHostnameAsFQDN: true
{{- end }}
securityContext:
{{- toYaml .Values.podSecurityContext | nindent 4 }}
containers:
- name: {{ .Chart.Name }}
securityContext:
{{- toYaml .Values.securityContext | nindent 8 }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
command:
- "refinery"
- "-c"
- "/etc/refinery/config.yaml"
- "-r"
- "/etc/refinery/rules.yaml"
{{- with .Values.environment }}
env:
{{- toYaml . | nindent 8 }}
{{- end }}
ports:
- name: data
containerPort: 8080
protocol: TCP
- name: grpc
containerPort: 4317
protocol: TCP
- name: peer
containerPort: 8081
protocol: TCP
{{- if eq .Values.config.Metrics "prometheus" }}
- name: metrics
containerPort: 9090
protocol: TCP
{{- end }}
volumeMounts:
- name: refinery-config
mountPath: /etc/refinery/
{{- with .Values.extraVolumeMounts }}
{{- toYaml . | nindent 8 }}
{{- end }}
livenessProbe:
httpGet:
path: /alive
port: data
initialDelaySeconds: 10
periodSeconds: 10
failureThreshold: 3
readinessProbe:
httpGet:
path: /alive
port: data
initialDelaySeconds: 0
periodSeconds: 3
failureThreshold: 5
resources:
{{- toYaml .Values.resources | nindent 8 }}
volumes:
- name: refinery-config
projected:
sources:
- configMap:
name: {{ include "refinery.fullname" . }}-config
items:
- key: config.yaml
path: config.yaml
- configMap:
{{- if .Values.config.RulesConfigMapName }}
name: {{ .Values.config.RulesConfigMapName }}
{{- else }}
name: {{ include "refinery.fullname" . }}-rules
{{- end }}
items:
- key: rules.yaml
path: rules.yaml
{{- with .Values.extraVolumes }}
{{- toYaml . | nindent 4 }}
{{- end }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 4 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 4 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 4 }}
{{- end }}
{{- end }}
129 changes: 17 additions & 112 deletions charts/refinery/templates/deployment.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{{- if eq .Values.mode "deployment" -}}
apiVersion: apps/v1
kind: Deployment
metadata:
Expand All @@ -9,122 +10,26 @@ metadata:
annotations: {{ toYaml . | nindent 4 }}
{{- end }}
spec:
{{- if eq .Values.mode "statefulset" }}
# Governing service to provide stable network ID for StatefulSet pods:
# https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#stable-network-id
serviceName: {{ include "refinery.fullname" . }}-cluster
# Refiniery doesn't really have any state, so in theory there's no
# need for the controlled scale-up / scale-down of the default
# OrderedReady policy. However if all the pods come up at once while
# the redis peer list exists, most pods will crash loop because
# they're unable to reach some of the peers. The OrderedReady delay
# gives time for membership to expire, and makes it quicker overall
# unless you also take care to blow away the redis peer list when
# scaling.
podManagementPolicy: OrderedReady
{{- end }}
{{- if not .Values.autoscaling.enabled }}
replicas: {{ .Values.replicaCount }}
{{- end }}
selector:
matchLabels:
{{- include "refinery.selectorLabels" . | nindent 6 }}
template:
metadata:
annotations:
checksum/config: {{ include (print $.Template.BasePath "/configmap-config.yaml") . | sha256sum }}
{{- with .Values.podAnnotations }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- if eq .Values.config.Metrics "prometheus" }}
prometheus.io/port: "9090"
prometheus.io/scrape: "true"
{{- end }}
{{- if hasKey .Values.rules "LiveReload" | ternary (not .Values.rules.LiveReload) false }}
checksum/rules: {{ include (print $.Template.BasePath "/configmap-rules.yaml") . | sha256sum }}
{{- end }}
labels:
{{- include "refinery.selectorLabels" . | nindent 8 }}
{{- with .Values.podLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
spec:
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
serviceAccountName: {{ include "refinery.serviceAccountName" . }}
securityContext:
{{- toYaml .Values.podSecurityContext | nindent 8 }}
containers:
- name: {{ .Chart.Name }}
securityContext:
{{- toYaml .Values.securityContext | nindent 12 }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
command:
- "refinery"
- "-c"
- "/etc/refinery/config.yaml"
- "-r"
- "/etc/refinery/rules.yaml"
{{- with .Values.environment }}
env:
{{- toYaml . | nindent 12 }}
{{- end }}
ports:
- name: data
containerPort: 8080
protocol: TCP
- name: grpc
containerPort: 4317
protocol: TCP
- name: peer
containerPort: 8081
protocol: TCP
{{- if eq .Values.config.Metrics "prometheus" }}
- name: metrics
containerPort: 9090
protocol: TCP
{{- end }}
volumeMounts:
- name: refinery-config
mountPath: /etc/refinery/
{{- with .Values.extraVolumeMounts }}
{{- toYaml . | nindent 12 }}
{{- end }}
livenessProbe:
httpGet:
path: /alive
port: data
initialDelaySeconds: 10
periodSeconds: 10
failureThreshold: 3
readinessProbe:
httpGet:
path: /alive
port: data
initialDelaySeconds: 0
periodSeconds: 3
failureThreshold: 5
resources:
{{- toYaml .Values.resources | nindent 12 }}
volumes:
- name: refinery-config
projected:
sources:
- configMap:
name: {{ include "refinery.fullname" . }}-config
items:
- key: config.yaml
path: config.yaml
- configMap:
{{- if .Values.config.RulesConfigMapName }}
name: {{ .Values.config.RulesConfigMapName }}
{{- else }}
name: {{ include "refinery.fullname" . }}-rules
{{- end }}
items:
- key: rules.yaml
path: rules.yaml
{{- with .Values.extraVolumes }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- include "refinery.pod" . | nindent 4 }}
{{- end }}
19 changes: 19 additions & 0 deletions charts/refinery/templates/headless-service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{{- if eq .Values.mode "statefulset" -}}
# Governing service to provide stable network ID for StatefulSet pods:
# https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#stable-network-id
apiVersion: v1
kind: Service
metadata:
name: {{ include "refinery.fullname" . }}-cluster
namespace: {{ .Release.Namespace }}
labels:
{{- include "refinery.labels" . | nindent 4 }}
{{- with .Values.service.labels }}
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
type: ClusterIP
clusterIP: None
selector:
{{- include "refinery.selectorLabels" . | nindent 4 }}
{{- end }}
2 changes: 1 addition & 1 deletion charts/refinery/templates/hpa.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ metadata:
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
kind: {{ if eq .Values.mode "statefulset" }}StatefulSet{{ else }}Deployment{{ end }}
name: {{ include "refinery.fullname" . }}
minReplicas: {{ .Values.autoscaling.minReplicas }}
maxReplicas: {{ .Values.autoscaling.maxReplicas }}
Expand Down
33 changes: 33 additions & 0 deletions charts/refinery/templates/statefulset.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{{- if eq .Values.mode "statefulset" -}}
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: {{ include "refinery.fullname" . }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "refinery.labels" . | nindent 4 }}
{{- with .Values.deploymentAnnotations }}
annotations: {{ toYaml . | nindent 4 }}
{{- end }}
spec:
# Governing service to provide stable network ID for StatefulSet pods:
# https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#stable-network-id
serviceName: {{ include "refinery.fullname" . }}-cluster
# Refinery doesn't really have any state, so in theory there's no
# need for the controlled scale-up / scale-down of the default
# OrderedReady policy. However if all the pods come up at once while
# the redis peer list exists, most pods will crash loop because
# they're unable to reach some of the peers. The OrderedReady delay
# gives time for membership to expire, and makes it quicker overall
# unless you also take care to blow away the redis peer list when
# scaling.
podManagementPolicy: OrderedReady
{{- if not .Values.autoscaling.enabled }}
replicas: {{ .Values.replicaCount }}
{{- end }}
selector:
matchLabels:
{{- include "refinery.selectorLabels" . | nindent 6 }}
template:
{{- include "refinery.pod" . | nindent 4 }}
{{- end }}
19 changes: 16 additions & 3 deletions charts/refinery/values.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# Default values for refinery.

## Deployment mode ##
#
# Set to either `deployment` or `statefulset`. The default is `deployment`, but
# `statefulset` might be preferred because the refinery nodes have stable
# hostnames. This means the cluster membership appears static even if individual
# pods are rescheduled.
mode: "deployment"

## Scaling Refinery ##
#
# Refinery is a stateful service and is not optimized for dynamic auto-scaling.
Expand Down Expand Up @@ -227,9 +235,14 @@ config:
UseTLS: false

# IdentifierInterfaceName is optional.
# Due to the nature of DNS in Kubernetes, it is recommended to set this value to the 'eth0' interface name.
# When configured the pod's IP will be used in the peer list
IdentifierInterfaceName: eth0
# When running as a deployment, pod names are not stable or resolvable, so
# it is recommended to set this value to the 'eth0' interface name. This
# way, refinery will use the pod's IP peer list.
#
# When running as a statefulset, pods will have a stable and resolvable DNS
# name, so we can leave this empty and rely on the default which calls
# `os.Hostname()`.
IdentifierInterfaceName: "{{ if eq .Values.mode \"statefulset\" }}{{ else }}eth0{{ end }}"

# UseIPV6Identifier is optional. If using IdentifierInterfaceName, Refinery will default to the first
# IPv4 unicast address it finds for the specified interface. If UseIPV6Identifier is used, will use
Expand Down

0 comments on commit 41e6685

Please sign in to comment.