From 85228ed3562ed0fc7f4cc80bf7b13d355a7f70fa Mon Sep 17 00:00:00 2001 From: Damien Ciabrini Date: Tue, 19 Dec 2023 14:50:36 +0100 Subject: [PATCH] Add support for clustered Redis Redis 6 comes with sentinel, which allows Redis to run as a A/P service with automatic failover. Update the redis controller to support deploying Redis as a standalone 1-pod Redis service, or a 3-pods A/P clustered service. Each redis pod hosts a container that runs the redis server, and another container that runs a sentinel for the quorated cluster management. --- controllers/redis/redis_controller.go | 100 +++++++++-- pkg/redis/service.go | 32 +++- pkg/redis/statefulset.go | 134 +++++++++++++++ pkg/redis/volumes.go | 146 ++++++++++++++++ templates/redis/bin/check_redis_endpoints.sh | 38 +++++ templates/redis/bin/common.sh | 85 ++++++++++ templates/redis/bin/redis_probe.sh | 17 ++ .../redis/bin/start_redis_replication.sh | 26 +++ templates/redis/bin/start_sentinel.sh | 28 ++++ templates/redis/config/config-sentinel.json | 20 +++ templates/redis/config/config.json | 19 +++ templates/redis/config/redis.conf.in | 37 +++++ templates/redis/config/sentinel.conf.in | 8 + tests/kuttl/tests/redis/01-assert.yaml | 138 ++++++++++++++++ tests/kuttl/tests/redis/01-deploy-redis.yaml | 6 + tests/kuttl/tests/redis/02-assert.yaml | 156 ++++++++++++++++++ tests/kuttl/tests/redis/02-ha-redis.yaml | 14 ++ tests/kuttl/tests/redis/03-assert.yaml | 15 ++ tests/kuttl/tests/redis/04-assert.yaml | 15 ++ tests/kuttl/tests/redis/04-failover.yaml | 6 + tests/kuttl/tests/redis/05-assert.yaml | 17 ++ 21 files changed, 1044 insertions(+), 13 deletions(-) create mode 100644 pkg/redis/statefulset.go create mode 100644 pkg/redis/volumes.go create mode 100755 templates/redis/bin/check_redis_endpoints.sh create mode 100644 templates/redis/bin/common.sh create mode 100755 templates/redis/bin/redis_probe.sh create mode 100755 templates/redis/bin/start_redis_replication.sh create mode 100755 templates/redis/bin/start_sentinel.sh create mode 100644 templates/redis/config/config-sentinel.json create mode 100644 templates/redis/config/config.json create mode 100644 templates/redis/config/redis.conf.in create mode 100644 templates/redis/config/sentinel.conf.in create mode 100644 tests/kuttl/tests/redis/01-assert.yaml create mode 100644 tests/kuttl/tests/redis/01-deploy-redis.yaml create mode 100644 tests/kuttl/tests/redis/02-assert.yaml create mode 100644 tests/kuttl/tests/redis/02-ha-redis.yaml create mode 100644 tests/kuttl/tests/redis/03-assert.yaml create mode 100644 tests/kuttl/tests/redis/04-assert.yaml create mode 100644 tests/kuttl/tests/redis/04-failover.yaml create mode 100644 tests/kuttl/tests/redis/05-assert.yaml diff --git a/controllers/redis/redis_controller.go b/controllers/redis/redis_controller.go index bbb1a03f..2aa9c7a3 100644 --- a/controllers/redis/redis_controller.go +++ b/controllers/redis/redis_controller.go @@ -18,6 +18,7 @@ package redis import ( "context" + "fmt" "time" "k8s.io/apimachinery/pkg/runtime" @@ -28,7 +29,10 @@ import ( "github.com/go-logr/logr" redisv1beta1 "github.com/openstack-k8s-operators/infra-operator/apis/redis/v1beta1" + "github.com/openstack-k8s-operators/lib-common/modules/common/configmap" + "github.com/openstack-k8s-operators/lib-common/modules/common/env" "github.com/openstack-k8s-operators/lib-common/modules/common/helper" + "github.com/openstack-k8s-operators/lib-common/modules/common/util" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -37,9 +41,11 @@ import ( redis "github.com/openstack-k8s-operators/infra-operator/pkg/redis" condition "github.com/openstack-k8s-operators/lib-common/modules/common/condition" - commondeployment "github.com/openstack-k8s-operators/lib-common/modules/common/deployment" + common_rbac "github.com/openstack-k8s-operators/lib-common/modules/common/rbac" commonservice "github.com/openstack-k8s-operators/lib-common/modules/common/service" + commonstatefulset "github.com/openstack-k8s-operators/lib-common/modules/common/statefulset" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" ) // GetLogger returns a logger object with a prefix of "controller.name" and additional controller context fields @@ -133,6 +139,8 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ct cl := condition.CreateList( // endpoint for adoption redirect condition.UnknownCondition(condition.ExposeServiceReadyCondition, condition.InitReason, condition.ExposeServiceReadyInitMessage), + // configmap generation + condition.UnknownCondition(condition.ServiceConfigReadyCondition, condition.InitReason, condition.ServiceConfigReadyInitMessage), // redis pods ready condition.UnknownCondition(condition.DeploymentReadyCondition, condition.InitReason, condition.DeploymentReadyInitMessage), // service account, role, rolebinding conditions @@ -172,6 +180,36 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ct return rbacResult, nil } + // Redis config maps + configMapVars := make(map[string]env.Setter) + err = r.generateConfigMaps(ctx, helper, instance, &configMapVars) + if err != nil { + instance.Status.Conditions.Set(condition.FalseCondition( + condition.ServiceConfigReadyCondition, + condition.ErrorReason, + condition.SeverityWarning, + condition.ServiceConfigReadyErrorMessage, + err.Error())) + return ctrl.Result{}, fmt.Errorf("error calculating configmap hash: %w", err) + } + instance.Status.Conditions.MarkTrue(condition.ServiceConfigReadyCondition, condition.ServiceConfigReadyMessage) + + // the headless service provides DNS entries for pods + // the name of the resource must match the name of the app selector + pkghl := redis.HeadlessService(instance) + headless := &corev1.Service{ObjectMeta: pkghl.ObjectMeta} + _, err = controllerutil.CreateOrPatch(ctx, r.Client, headless, func() error { + headless.Spec = pkghl.Spec + err := controllerutil.SetOwnerReference(instance, headless, r.Client.Scheme()) + if err != nil { + return err + } + return nil + }) + if err != nil { + return ctrl.Result{}, err + } + // Service to expose Redis pods commonsvc, err := commonservice.NewService(redis.Service(instance), time.Duration(5)*time.Second, nil) if err != nil { @@ -195,30 +233,70 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ct } instance.Status.Conditions.MarkTrue(condition.ExposeServiceReadyCondition, condition.ExposeServiceReadyMessage) - // Deployment - commondeployment := commondeployment.NewDeployment(redis.Deployment(instance), time.Duration(5)*time.Second) - sfres, sferr := commondeployment.CreateOrPatch(ctx, helper) - if sferr != nil { - return sfres, sferr - } - deployment := commondeployment.GetDeployment() - // // Reconstruct the state of the redis resource based on the deployment and its pods // - if deployment.Status.ReadyReplicas > 0 { + // Statefulset + commonstatefulset := commonstatefulset.NewStatefulSet(redis.StatefulSet(instance), 5) + sfres, sferr := commonstatefulset.CreateOrPatch(ctx, helper) + if sferr != nil { + return sfres, sferr + } + statefulset := commonstatefulset.GetStatefulSet() + + if statefulset.Status.ReadyReplicas > 0 { instance.Status.Conditions.MarkTrue(condition.DeploymentReadyCondition, condition.DeploymentReadyMessage) } return ctrl.Result{}, nil } +// generateConfigMaps returns the config map resource for a galera instance +func (r *Reconciler) generateConfigMaps( + ctx context.Context, + h *helper.Helper, + instance *redisv1beta1.Redis, + envVars *map[string]env.Setter, +) error { + templateParameters := make(map[string]interface{}) + customData := make(map[string]string) + + cms := []util.Template{ + // ScriptsConfigMap + { + Name: fmt.Sprintf("%s-scripts", instance.Name), + Namespace: instance.Namespace, + Type: util.TemplateTypeScripts, + InstanceType: instance.Kind, + Labels: map[string]string{}, + }, + // ConfigMap + { + Name: fmt.Sprintf("%s-config-data", instance.Name), + Namespace: instance.Namespace, + Type: util.TemplateTypeConfig, + InstanceType: instance.Kind, + CustomData: customData, + ConfigOptions: templateParameters, + Labels: map[string]string{}, + }, + } + + err := configmap.EnsureConfigMaps(ctx, h, instance, cms, envVars) + if err != nil { + util.LogErrorForObject(h, err, "Unable to retrieve or create config maps", instance) + return err + } + + return nil +} + // SetupWithManager sets up the controller with the Manager. func (r *Reconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). For(&redisv1beta1.Redis{}). - Owns(&appsv1.Deployment{}). + Owns(&appsv1.StatefulSet{}). Owns(&corev1.Service{}). Owns(&corev1.ServiceAccount{}). Owns(&rbacv1.Role{}). diff --git a/pkg/redis/service.go b/pkg/redis/service.go index 66e0cc77..f0c62be8 100644 --- a/pkg/redis/service.go +++ b/pkg/redis/service.go @@ -5,6 +5,7 @@ import ( labels "github.com/openstack-k8s-operators/lib-common/modules/common/labels" service "github.com/openstack-k8s-operators/lib-common/modules/common/service" corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) // Service exposes redis pods for a redis CR @@ -19,16 +20,43 @@ func Service(m *redisv1beta1.Redis) *corev1.Service { Namespace: m.GetNamespace(), Labels: labels, Selector: map[string]string{ - "app": "redis", + "app": "redis", + "cr": "redis-redis", + "redis/master": "true", }, Port: service.GenericServicePort{ Name: "redis", Port: 6379, Protocol: "TCP", }, - ClusterIP: "None", } svc := service.GenericService(details) return svc } + +// HeadlessService - service to give redis pods connectivity via DNS +func HeadlessService(m *redisv1beta1.Redis) *corev1.Service { + dep := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: m.GetName() + "-" + "redis", + Namespace: m.GetNamespace(), + }, + Spec: corev1.ServiceSpec{ + Type: "ClusterIP", + ClusterIP: "None", + Ports: []corev1.ServicePort{ + {Name: "redis", Protocol: "TCP", Port: 6379}, + {Name: "sentinel", Protocol: "TCP", Port: 26379}, + }, + Selector: map[string]string{ + "app": "redis", + "cr": "redis-redis", + }, + // This is required to let pod communicate when + // they are still in Starting state + PublishNotReadyAddresses: true, + }, + } + return dep +} diff --git a/pkg/redis/statefulset.go b/pkg/redis/statefulset.go new file mode 100644 index 00000000..dc9bf509 --- /dev/null +++ b/pkg/redis/statefulset.go @@ -0,0 +1,134 @@ +package redis + +import ( + "strconv" + + redisv1beta1 "github.com/openstack-k8s-operators/infra-operator/apis/redis/v1beta1" + labels "github.com/openstack-k8s-operators/lib-common/modules/common/labels" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" +) + +// Deployment returns a Deployment resource for the Redis CR +func StatefulSet(r *redisv1beta1.Redis) *appsv1.StatefulSet { + matchls := map[string]string{ + "app": "redis", + "cr": "redis-" + r.Name, + "owner": "infra-operator", + } + ls := labels.GetLabels(r, "redis", matchls) + + livenessProbe := &corev1.Probe{ + // TODO might need tuning + TimeoutSeconds: 5, + PeriodSeconds: 3, + InitialDelaySeconds: 3, + } + readinessProbe := &corev1.Probe{ + // TODO might need tuning + TimeoutSeconds: 5, + PeriodSeconds: 5, + InitialDelaySeconds: 5, + } + sentinelLivenessProbe := &corev1.Probe{ + // TODO might need tuning + TimeoutSeconds: 5, + PeriodSeconds: 3, + InitialDelaySeconds: 3, + } + sentinelReadinessProbe := &corev1.Probe{ + // TODO might need tuning + TimeoutSeconds: 5, + PeriodSeconds: 5, + InitialDelaySeconds: 5, + } + + // TODO might want to disable probes in 'Debug' mode + livenessProbe.TCPSocket = &corev1.TCPSocketAction{ + Port: intstr.IntOrString{Type: intstr.Int, IntVal: int32(6379)}, + } + readinessProbe.TCPSocket = &corev1.TCPSocketAction{ + Port: intstr.IntOrString{Type: intstr.Int, IntVal: int32(6379)}, + } + sentinelLivenessProbe.TCPSocket = &corev1.TCPSocketAction{ + Port: intstr.IntOrString{Type: intstr.Int, IntVal: int32(26379)}, + } + sentinelReadinessProbe.TCPSocket = &corev1.TCPSocketAction{ + Port: intstr.IntOrString{Type: intstr.Int, IntVal: int32(26379)}, + } + name := r.Name + "-" + "redis" + sts := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: r.Namespace, + }, + Spec: appsv1.StatefulSetSpec{ + ServiceName: name, + Replicas: r.Spec.Replicas, + Selector: &metav1.LabelSelector{ + MatchLabels: ls, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: ls, + }, + Spec: corev1.PodSpec{ + ServiceAccountName: r.RbacResourceName(), + Containers: []corev1.Container{{ + Image: r.Spec.ContainerImage, + Command: []string{"/var/lib/operator-scripts/start_redis_replication.sh"}, + Name: "redis", + Env: []corev1.EnvVar{{ + Name: "KOLLA_CONFIG_STRATEGY", + Value: "COPY_ALWAYS", + }}, + VolumeMounts: getRedisVolumeMounts(), + Ports: []corev1.ContainerPort{{ + ContainerPort: 6379, + Name: "redis", + }}, + LivenessProbe: &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + Exec: &corev1.ExecAction{ + Command: []string{"/var/lib/operator-scripts/redis_probe.sh", "liveness"}, + }, + }, + }, + ReadinessProbe: &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + Exec: &corev1.ExecAction{ + Command: []string{"/var/lib/operator-scripts/redis_probe.sh", "readiness"}, + }, + }, + }, + }, { + Image: r.Spec.ContainerImage, + Command: []string{"/var/lib/operator-scripts/start_sentinel.sh"}, + + Name: "sentinel", + Env: []corev1.EnvVar{{ + Name: "SENTINEL_QUORUM", + Value: strconv.Itoa((int(*r.Spec.Replicas) / 2) + 1), + }, { + Name: "KOLLA_CONFIG_STRATEGY", + Value: "COPY_ALWAYS", + }}, + VolumeMounts: getSentinelVolumeMounts(), + Ports: []corev1.ContainerPort{{ + ContainerPort: 26379, + Name: "sentinel", + }}, + ReadinessProbe: sentinelReadinessProbe, + LivenessProbe: sentinelLivenessProbe, + }, + }, + Volumes: getVolumes(r), + }, + }, + }, + } + + return sts +} diff --git a/pkg/redis/volumes.go b/pkg/redis/volumes.go new file mode 100644 index 00000000..af2ab73f --- /dev/null +++ b/pkg/redis/volumes.go @@ -0,0 +1,146 @@ +package redis + +import ( + "fmt" + + redisv1beta1 "github.com/openstack-k8s-operators/infra-operator/apis/redis/v1beta1" + corev1 "k8s.io/api/core/v1" +) + +func getVolumes(r *redisv1beta1.Redis) []corev1.Volume { + scriptsPerms := int32(0755) + vols := []corev1.Volume{ + { + Name: "kolla-config", + VolumeSource: corev1.VolumeSource{ + ConfigMap: &corev1.ConfigMapVolumeSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: fmt.Sprintf("%s-config-data", r.Name), + }, + Items: []corev1.KeyToPath{ + { + Key: "config.json", + Path: "config.json", + }, + }, + }, + }, + }, + { + Name: "kolla-config-sentinel", + VolumeSource: corev1.VolumeSource{ + ConfigMap: &corev1.ConfigMapVolumeSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: fmt.Sprintf("%s-config-data", r.Name), + }, + Items: []corev1.KeyToPath{ + { + Key: "config-sentinel.json", + Path: "config.json", + }, + }, + }, + }, + }, + { + Name: "generated-config-data", + VolumeSource: corev1.VolumeSource{ + EmptyDir: &corev1.EmptyDirVolumeSource{}, + }, + }, + { + Name: "config-data", + VolumeSource: corev1.VolumeSource{ + ConfigMap: &corev1.ConfigMapVolumeSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: fmt.Sprintf("%s-config-data", r.Name), + }, + Items: []corev1.KeyToPath{ + { + Key: "sentinel.conf.in", + Path: "var/lib/redis/sentinel.conf.in", + }, + { + Key: "redis.conf.in", + Path: "var/lib/redis/redis.conf.in", + }, + }, + }, + }, + }, + { + Name: "operator-scripts", + VolumeSource: corev1.VolumeSource{ + ConfigMap: &corev1.ConfigMapVolumeSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: r.Name + "-scripts", + }, + Items: []corev1.KeyToPath{ + { + Key: "start_redis_replication.sh", + Path: "start_redis_replication.sh", + }, + { + Key: "start_sentinel.sh", + Path: "start_sentinel.sh", + }, + { + Key: "redis_probe.sh", + Path: "redis_probe.sh", + }, + { + Key: "check_redis_endpoints.sh", + Path: "check_redis_endpoints.sh", + }, + { + Key: "common.sh", + Path: "common.sh", + }, + }, + DefaultMode: &scriptsPerms, + }, + }, + }, + } + return vols +} + +func getRedisVolumeMounts() []corev1.VolumeMount { + vm := []corev1.VolumeMount{{ + MountPath: "/var/lib/kolla/config_files/src", + ReadOnly: true, + Name: "config-data", + }, { + MountPath: "/var/lib/kolla/config_files/generated", + Name: "generated-config-data", + }, { + MountPath: "/var/lib/operator-scripts", + ReadOnly: true, + Name: "operator-scripts", + }, { + MountPath: "/var/lib/kolla/config_files", + ReadOnly: true, + Name: "kolla-config", + }} + return vm +} + +func getSentinelVolumeMounts() []corev1.VolumeMount { + vm := []corev1.VolumeMount{{ + MountPath: "/var/lib/kolla/config_files/src", + ReadOnly: true, + Name: "config-data", + }, { + MountPath: "/var/lib/kolla/config_files/generated", + Name: "generated-config-data", + }, { + MountPath: "/var/lib/operator-scripts", + ReadOnly: true, + Name: "operator-scripts", + }, { + MountPath: "/var/lib/kolla/config_files", + ReadOnly: true, + Name: "kolla-config-sentinel", + }} + return vm +} diff --git a/templates/redis/bin/check_redis_endpoints.sh b/templates/redis/bin/check_redis_endpoints.sh new file mode 100755 index 00000000..dc4eb671 --- /dev/null +++ b/templates/redis/bin/check_redis_endpoints.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +. /var/lib/operator-scripts/common.sh + +# When the master changed because of a failover, redis notifies this +# script with the following arguments: +# + +log "$0 called with arguments: $*" + +CLUSTER_NAME=$1 +POD_ROLE=$2 +STATE=$3 +OLD_MASTER=$4 +NEW_MASTER=$6 + +OLD_POD=$(echo $OLD_MASTER | cut -d. -f1) +NEW_POD=$(echo $NEW_MASTER | cut -d. -f1) + +if [ "$POD_ROLE" = "leader" ]; then + log "Preparing the endpoint for the failover ${OLD_POD} -> ${NEW_POD}" + + log "Removing ${OLD_POD} from the Redis service's endpoint" + remove_pod_label $OLD_POD redis~1master + if [ $? != 0 ]; then + log_error "Could not remove service endpoint. Aborting" + exit 1 + fi + + log "Setting ${NEW_POD} as the new endpoint for the Redis service" + set_pod_label $NEW_POD redis~1master + if [ $? != 0 ]; then + log_error "Could not add service endpoint. Aborting" + exit 1 + fi +else + log "No action taken since we were an observer during the failover" +fi diff --git a/templates/redis/bin/common.sh b/templates/redis/bin/common.sh new file mode 100644 index 00000000..a2d57c01 --- /dev/null +++ b/templates/redis/bin/common.sh @@ -0,0 +1,85 @@ +# Environment variable common to all scripts +APISERVER=https://kubernetes.default.svc +SERVICEACCOUNT=/var/run/secrets/kubernetes.io/serviceaccount +NAMESPACE=$(cat ${SERVICEACCOUNT}/namespace) +TOKEN=$(cat ${SERVICEACCOUNT}/token) +CACERT=${SERVICEACCOUNT}/ca.crt + +TIMEOUT=3 + +PODNAME=$(echo $HOSTNAME | cut -d. -f1,2) +PODIP=$(grep "${PODNAME}" /etc/hosts | cut -d$'\t' -f1) +# redis-redis-0.redis-redis.openstack.svc.cluster.local +PODFQDN=$(getent hosts "${PODIP}" | awk '{print $2}') +# redis-redis.openstack.svc.cluster.local +SVCFQDN=$(echo "${PODFQDN}" | sed 's/^[^\.]*.//') + +function log() { + echo "$(date +%F_%H_%M_%S) $*" +} + +function log_error() { + echo "$(date +%F_%H_%M_%S) ERROR: $*" +} + +function generate_configs() { + # Copying config files except template files + mkdir -p /var/lib/kolla/config_files/generated + tar -C /var/lib/kolla/config_files --exclude '..*' --exclude '*.in' -h -c src | tar -C /var/lib/kolla/config_files/generated -x --strip=1 + # Generating config files from templates + cd /var/lib/kolla/config_files/src + for cfg in $(find -L * -name '*.conf.in'); do + log "Generating config file from template $PWD/${cfg}" + sed -e "s/{ PODNAME }/${PODNAME}/" -e "s/{ PODIP }/${PODIP}/" -e "s/{ PODFQDN }/${PODFQDN}/" "${cfg}" > "/var/lib/kolla/config_files/generated/${cfg%.in}" + done +} + +function is_bootstrap_pod() { + echo "$1" | grep -qe '-0$' +} + +function extract() { + local var="$1" + local output="$2" + # parse curl vars as well as kube api error fields + echo "$output" | awk -F'[:,]' "/\"?${var}\"?:/ {print \$2; exit}" +} + +function configure_pod_label() { + local pod="$1" + local patch="$2" + local success="$3" + local curlvars="\nexitcode:%{exitcode}\nerrormsg:%{errormsg}\nhttpcode:%{response_code}\n" + + response=$(curl -s -w "${curlvars}" --cacert ${CACERT} --header "Content-Type:application/json-patch+json" --header "Authorization: Bearer ${TOKEN}" --request PATCH --data "$patch" ${APISERVER}/api/v1/namespaces/${NAMESPACE}/pods/${pod}) + + exitcode=$(extract exitcode "$response") + if [ $exitcode -ne 0 ]; then + errormsg=$(extract errormsg "$response") + log_error "Error when running curl: ${errormsg} (${exitcode})" + return 1 + fi + + httpcode=$(extract httpcode "$response") + if echo "${httpcode}" | grep -v -E "^${success}$"; then + message=$(extract message "$response") + log_error "Error when calling API server: ${message} (${httpcode})" + return 1 + fi +} + +function remove_pod_label() { + local pod="$1" + local label="$2" + local patch="[{\"op\": \"remove\", \"path\": \"/metadata/labels/${label}\"}]" + # 200: OK, 422: not found + configure_pod_label $pod "$patch" "(200|422)" +} + +function set_pod_label() { + local pod="$1" + local label="$2" + local patch="[{\"op\": \"add\", \"path\": \"/metadata/labels/${label}\", \"value\": \"true\"}]" + # 200: OK + configure_pod_label $pod "$patch" "200" +} diff --git a/templates/redis/bin/redis_probe.sh b/templates/redis/bin/redis_probe.sh new file mode 100755 index 00000000..c199a250 --- /dev/null +++ b/templates/redis/bin/redis_probe.sh @@ -0,0 +1,17 @@ +#!/bin/bash +set -eux + +case "$1" in + readiness) + # ready if we're the master or if we're a slave connected to the current master + output=$(redis-cli info replication | tr -d '\r') + declare -A state + while IFS=: read -r key value; do state[$key]=$value; done < <(echo "$output") + [[ "${state[role]}" == "master" ]] || [[ "${state[role]}" == "slave" && "${state[master_link_status]}" == "up" ]] + ;; + liveness) + redis-cli -e ping >/dev/null;; + *) + echo "Invalid probe option '$1'" + exit 1;; +esac diff --git a/templates/redis/bin/start_redis_replication.sh b/templates/redis/bin/start_redis_replication.sh new file mode 100755 index 00000000..9dc6aea7 --- /dev/null +++ b/templates/redis/bin/start_redis_replication.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +. /var/lib/operator-scripts/common.sh + +generate_configs +sudo -E kolla_set_configs + +# 1. check if a redis cluster is already running by contacting sentinel +output=$(timeout ${TIMEOUT} redis-cli -h ${SVCFQDN} -p 26379 sentinel master redis) +if [ $? -eq 0 ]; then + master=$(echo "$output" | awk '/^ip$/ {getline; print $0; exit}') + # TODO skip if no master was found + log "Connecting to the existing Redis cluster (master: ${master})" + exec redis-server /var/lib/redis/redis.conf --protected-mode no --replicaof "$master" 6379 +fi + +# 2. else bootstrap a new cluster (assume we should be the first redis pod) +if is_bootstrap_pod $PODNAME; then + log "Bootstrapping a new Redis cluster from ${PODNAME}" + set_pod_label $PODNAME redis~1master + exec redis-server /var/lib/redis/redis.conf --protected-mode no +fi + +# 3. else this is an error, exit and let the pod restart and try again +echo "Could not connect to a redis cluster" +exit 1 diff --git a/templates/redis/bin/start_sentinel.sh b/templates/redis/bin/start_sentinel.sh new file mode 100755 index 00000000..e51fce17 --- /dev/null +++ b/templates/redis/bin/start_sentinel.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +. /var/lib/operator-scripts/common.sh + +generate_configs +sudo -E kolla_set_configs + +# 1. check if a redis cluster is already running by contacting sentinel +output=$(timeout ${TIMEOUT} redis-cli -h ${SVCFQDN} -p 26379 sentinel master redis) +if [ $? -eq 0 ]; then + master=$(echo "$output" | awk '/^ip$/ {getline; print $0; exit}') + # TODO skip if no master was found + log "Connecting to the existing sentinel cluster (master: $master)" + echo "sentinel monitor redis ${master} 6379 ${SENTINEL_QUORUM}" >> /var/lib/redis/sentinel.conf + exec redis-sentinel /var/lib/redis/sentinel.conf +fi + +# 2. else let the pod's redis server bootstrap a new cluster and monitor it +# (assume we should be the first redis pod) +if is_bootstrap_pod $PODNAME; then + log "Bootstrapping a new sentinel cluster" + echo "sentinel monitor redis ${PODFQDN} 6379 ${SENTINEL_QUORUM}" >> /var/lib/redis/sentinel.conf + exec redis-sentinel /var/lib/redis/sentinel.conf +fi + +# 3. else this is an error, exit and let the pod restart and try again +echo "Could not connect to a sentinel cluster" +exit 1 diff --git a/templates/redis/config/config-sentinel.json b/templates/redis/config/config-sentinel.json new file mode 100644 index 00000000..0409c8b7 --- /dev/null +++ b/templates/redis/config/config-sentinel.json @@ -0,0 +1,20 @@ +{ + "command": "redis-sentinel /var/lib/redis/sentinel.conf", + "config_files": [ + { + "dest": "/", + "merge": true, + "preserve_properties": true, + "optional": true, + "source": "/var/lib/kolla/config_files/generated/*" + } + ], + "permissions": [ + { + "owner": "redis:redis", + "perm": "0755", + "path": "/var/lib/redis", + "recursive": true + } + ] +} diff --git a/templates/redis/config/config.json b/templates/redis/config/config.json new file mode 100644 index 00000000..d0990b52 --- /dev/null +++ b/templates/redis/config/config.json @@ -0,0 +1,19 @@ +{ + "command": "redis-server /var/lib/redis/redis.conf", + "config_files": [ + { + "dest": "/", + "merge": true, + "preserve_properties": true, + "optional": true, + "source": "/var/lib/kolla/config_files/generated/*" + } + ], + "permissions": [ + { + "owner": "redis:redis", + "path": "/var/lib/redis", + "recursive": true + } + ] +} diff --git a/templates/redis/config/redis.conf.in b/templates/redis/config/redis.conf.in new file mode 100644 index 00000000..bd802920 --- /dev/null +++ b/templates/redis/config/redis.conf.in @@ -0,0 +1,37 @@ +dir /var/lib/redis +bind { PODFQDN } 127.0.0.1 +replica-announce-ip { PODFQDN } +port 6379 +tcp-backlog 511 +timeout 0 +tcp-keepalive 300 +daemonize no +loglevel notice +logfile "" +databases 16 +always-show-logo no +set-proc-title yes +proc-title-template "{title} {listen-addr} {server-mode}" +stop-writes-on-bgsave-error yes +rdbcompression yes +rdbchecksum yes +dbfilename dump.rdb +rdb-del-sync-files no +replica-serve-stale-data yes +replica-read-only yes +repl-diskless-sync no +repl-diskless-sync-delay 5 +repl-diskless-load disabled +repl-disable-tcp-nodelay no +replica-priority 100 +acllog-max-len 128 +lazyfree-lazy-eviction no +lazyfree-lazy-expire no +lazyfree-lazy-server-del no +replica-lazy-flush no +lazyfree-lazy-user-del no +lazyfree-lazy-user-flush no +oom-score-adj no +oom-score-adj-values 0 200 800 +disable-thp yes +appendonly no diff --git a/templates/redis/config/sentinel.conf.in b/templates/redis/config/sentinel.conf.in new file mode 100644 index 00000000..4c38a664 --- /dev/null +++ b/templates/redis/config/sentinel.conf.in @@ -0,0 +1,8 @@ +dir /var/lib/redis +bind { PODFQDN } 127.0.0.1 +sentinel resolve-hostnames yes +sentinel announce-hostnames yes +sentinel announce-ip { PODFQDN } +sentinel down-after-milliseconds redis 10000 +sentinel failover-timeout redis 20000 +sentinel client-reconfig-script redis /var/lib/operator-scripts/check_redis_endpoints.sh diff --git a/tests/kuttl/tests/redis/01-assert.yaml b/tests/kuttl/tests/redis/01-assert.yaml new file mode 100644 index 00000000..bdda6a83 --- /dev/null +++ b/tests/kuttl/tests/redis/01-assert.yaml @@ -0,0 +1,138 @@ +# +# Check for: +# +# - 1 Redis CR +# - 1 stateful set to manage redis pod +# - 1 pod +# - 1 config map for the scripts +# - 1 config map for the generated redis config +# - 1 Headless Service for the cluster +# - 1 Service exposing the Redis port +# + +apiVersion: redis.openstack.org/v1beta1 +kind: Redis +metadata: + name: redis +spec: + replicas: 1 +status: + conditions: + - message: Setup complete + reason: Ready + status: "True" + type: Ready + - message: Deployment completed + reason: Ready + status: "True" + type: DeploymentReady + - message: Exposing service completed + reason: Ready + status: "True" + type: ExposeServiceReady + - message: RoleBinding created + reason: Ready + status: "True" + type: RoleBindingReady + - message: Role created + reason: Ready + status: "True" + type: RoleReady + - message: ServiceAccount created + reason: Ready + status: "True" + type: ServiceAccountReady + - message: Service config create completed + reason: Ready + status: "True" + type: ServiceConfigReady +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: redis-redis +spec: + replicas: 1 + selector: + matchLabels: + app: redis + cr: redis-redis + redis/name: redis + serviceName: redis-redis + template: + metadata: + labels: + app: redis + cr: redis-redis + redis/name: redis + spec: + containers: + - name: redis + ports: + - containerPort: 6379 + name: redis + protocol: TCP + - name: sentinel + ports: + - containerPort: 26379 + name: sentinel + protocol: TCP + serviceAccount: redis-redis + serviceAccountName: redis-redis +status: + availableReplicas: 1 + readyReplicas: 1 + replicas: 1 +--- +apiVersion: v1 +kind: Pod +metadata: + name: redis-redis-0 +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: redis-config-data +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: redis-scripts +--- +apiVersion: v1 +kind: Service +metadata: + name: redis-redis +spec: + ports: + - name: redis + port: 6379 + protocol: TCP + targetPort: 6379 + - name: sentinel + port: 26379 + protocol: TCP + targetPort: 26379 + selector: + app: redis + cr: redis-redis +--- +apiVersion: v1 +kind: Service +metadata: + name: redis +spec: + ports: + - name: redis + port: 6379 + protocol: TCP + targetPort: 6379 + selector: + app: redis + cr: redis-redis + redis/master: "true" +--- +apiVersion: v1 +kind: Endpoints +metadata: + name: redis diff --git a/tests/kuttl/tests/redis/01-deploy-redis.yaml b/tests/kuttl/tests/redis/01-deploy-redis.yaml new file mode 100644 index 00000000..aa3e2f31 --- /dev/null +++ b/tests/kuttl/tests/redis/01-deploy-redis.yaml @@ -0,0 +1,6 @@ +apiVersion: redis.openstack.org/v1beta1 +kind: Redis +metadata: + name: redis +spec: + replicas: 1 diff --git a/tests/kuttl/tests/redis/02-assert.yaml b/tests/kuttl/tests/redis/02-assert.yaml new file mode 100644 index 00000000..ba631be1 --- /dev/null +++ b/tests/kuttl/tests/redis/02-assert.yaml @@ -0,0 +1,156 @@ +# +# Check for: +# +# - 1 Redis CR +# - 1 stateful set to manage redis pod +# - 3 pod +# - 1 config map for the scripts +# - 1 config map for the generated redis config +# - 1 Headless Service for the cluster +# - 1 Service exposing the Redis port +# + +apiVersion: redis.openstack.org/v1beta1 +kind: Redis +metadata: + name: redis +spec: + replicas: 3 +status: + conditions: + - message: Setup complete + reason: Ready + status: "True" + type: Ready + - message: Deployment completed + reason: Ready + status: "True" + type: DeploymentReady + - message: Exposing service completed + reason: Ready + status: "True" + type: ExposeServiceReady + - message: RoleBinding created + reason: Ready + status: "True" + type: RoleBindingReady + - message: Role created + reason: Ready + status: "True" + type: RoleReady + - message: ServiceAccount created + reason: Ready + status: "True" + type: ServiceAccountReady + - message: Service config create completed + reason: Ready + status: "True" + type: ServiceConfigReady +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: redis-redis +spec: + replicas: 3 + selector: + matchLabels: + app: redis + cr: redis-redis + redis/name: redis + serviceName: redis-redis + template: + metadata: + labels: + app: redis + cr: redis-redis + redis/name: redis + spec: + containers: + - name: redis + ports: + - containerPort: 6379 + name: redis + protocol: TCP + - name: sentinel + ports: + - containerPort: 26379 + name: sentinel + protocol: TCP + serviceAccount: redis-redis + serviceAccountName: redis-redis +status: + availableReplicas: 3 + readyReplicas: 3 + replicas: 3 +--- +# at deployment time, pod-0 is always the active redis +# the other two pods should be passive +apiVersion: v1 +kind: Pod +metadata: + labels: + redis/master: "true" + name: redis-redis-0 +--- +apiVersion: v1 +kind: Pod +metadata: + name: redis-redis-1 +--- +apiVersion: v1 +kind: Pod +metadata: + name: redis-redis-2 +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: redis-config-data +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: redis-scripts +--- +apiVersion: v1 +kind: Service +metadata: + name: redis-redis +spec: + ports: + - name: redis + port: 6379 + protocol: TCP + targetPort: 6379 + - name: sentinel + port: 26379 + protocol: TCP + targetPort: 26379 + selector: + app: redis + cr: redis-redis +--- +apiVersion: v1 +kind: Service +metadata: + name: redis +spec: + ports: + - name: redis + port: 6379 + protocol: TCP + targetPort: 6379 + selector: + app: redis + cr: redis-redis + redis/master: "true" +--- +apiVersion: v1 +kind: Endpoints +metadata: + name: redis +subsets: +- addresses: + - targetRef: + name: redis-redis-0 diff --git a/tests/kuttl/tests/redis/02-ha-redis.yaml b/tests/kuttl/tests/redis/02-ha-redis.yaml new file mode 100644 index 00000000..ed9ce9bf --- /dev/null +++ b/tests/kuttl/tests/redis/02-ha-redis.yaml @@ -0,0 +1,14 @@ +# delete the previous 1-node redis if it exists +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +delete: + - apiVersion: redis.openstack.org/v1beta1 + kind: Redis + name: redis +--- +apiVersion: redis.openstack.org/v1beta1 +kind: Redis +metadata: + name: redis +spec: + replicas: 3 diff --git a/tests/kuttl/tests/redis/03-assert.yaml b/tests/kuttl/tests/redis/03-assert.yaml new file mode 100644 index 00000000..56f2204e --- /dev/null +++ b/tests/kuttl/tests/redis/03-assert.yaml @@ -0,0 +1,15 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +commands: + - script: | + set -e + SENTINELDATA=$(oc rsh -n $NAMESPACE -c sentinel redis-redis-0 redis-cli -p 26379 info | grep master | tr ',' '\n') + # there should be 1 master + echo "$SENTINELDATA" | grep -w sentinel_masters:1 + # there should be 2 slaves + echo "$SENTINELDATA" | grep -w slaves=2 + # there should be 3 connected sentinels for quorum + echo "$SENTINELDATA" | grep -w sentinels=3 + # there should be only a single pod accessible from the redis service + oc -n $NAMESPACE get endpoints redis -o json | jq '.subsets[0].addresses | length' diff --git a/tests/kuttl/tests/redis/04-assert.yaml b/tests/kuttl/tests/redis/04-assert.yaml new file mode 100644 index 00000000..3a1aef84 --- /dev/null +++ b/tests/kuttl/tests/redis/04-assert.yaml @@ -0,0 +1,15 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: redis-redis +status: + availableReplicas: 3 + readyReplicas: 3 + replicas: 3 +--- +# wait for a pod to be designated as the new redis master +apiVersion: v1 +kind: Pod +metadata: + labels: + redis/master: "true" diff --git a/tests/kuttl/tests/redis/04-failover.yaml b/tests/kuttl/tests/redis/04-failover.yaml new file mode 100644 index 00000000..09f94e39 --- /dev/null +++ b/tests/kuttl/tests/redis/04-failover.yaml @@ -0,0 +1,6 @@ +# delete the redis master (always the first pod right after deployment) +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: | + oc -n $NAMESPACE delete pod redis-redis-0 diff --git a/tests/kuttl/tests/redis/05-assert.yaml b/tests/kuttl/tests/redis/05-assert.yaml new file mode 100644 index 00000000..f1d584e7 --- /dev/null +++ b/tests/kuttl/tests/redis/05-assert.yaml @@ -0,0 +1,17 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +commands: + - script: | + set -e + SENTINELDATA=$(oc rsh -n $NAMESPACE -c sentinel redis-redis-0 redis-cli -p 26379 info | grep master | tr ',' '\n') + # there should be 1 master + echo "$SENTINELDATA" | grep -w sentinel_masters:1 + # there should be 2 slaves + echo "$SENTINELDATA" | grep -w slaves=2 + # there should be 3 connected sentinels for quorum + echo "$SENTINELDATA" | grep -w sentinels=3 + # there should be only a single pod accessible from the redis service + oc -n $NAMESPACE get endpoints redis -o json | jq '.subsets[0].addresses | length' + # the first pod should no longer be the master after the failover + echo "$SENTINELDATA" | grep address | grep -v redis-redis-0