Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rolling querynode 1 by 1 using 2 deployments #74

Merged
merged 28 commits into from
Feb 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ test: manifests generate fmt vet test-only ## Run tests.

code-check: go-generate fmt vet

test-only:
test-only: ## -race requires cgo
CGO_ENABLED=1 go test -race ./... -coverprofile tmp.out; cat tmp.out | sed '/zz_generated.deepcopy.go/d' | sed '/_mock.go/d' > cover.out

##@ Build
Expand Down Expand Up @@ -421,7 +421,7 @@ deploy-by-manifest: sit-prepare-operator-images sit-load-operator-images sit-gen

debug-start: dev-cert
kubectl -n milvus-operator patch deployment/milvus-operator --patch '{"spec":{"template":{"spec":{"containers":[{"name":"manager","args":["-namespace","milvus-operator","-name","milvus-operator","--health-probe-bind-address=:8081","--metrics-bind-address=:8080","--leader-elect","--stop-reconcilers=all"]}]}}}}'
go run ./main.go
go run ./main.go -debug

debug-stop:
kubectl -n milvus-operator patch deployment/milvus-operator --patch '{"spec":{"template":{"spec":{"containers":[{"name":"manager","args":["-namespace","milvus-operator","-name","milvus-operator","--health-probe-bind-address=:8081","--metrics-bind-address=:8080","--leader-elect"]}]}}}}'
111 changes: 111 additions & 0 deletions apis/milvus.io/v1beta1/label_annotations.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
package v1beta1

import (
"strconv"

"sigs.k8s.io/controller-runtime/pkg/client"
)

const (
MilvusIO = "milvus.io/"
OperatorVersionLabel = MilvusIO + "operator-version"
// DependencyValuesLegacySyncedAnnotation : For legacy versions before v0.5.1, default value is not set to CR.
// So if they upgrade to v0.5.1+, if the dependency default values in milvus-helm updated
// the inCluster dependencies will get restarted. So we sync defaults first to prevent this
DependencyValuesLegacySyncedAnnotation = MilvusIO + "dependency-values-legacy-synced"
DependencyValuesMergedAnnotation = MilvusIO + "dependency-values-merged"
LegacyVersion = "v0.5.0-legacy"
FalseStr = "false"
TrueStr = "true"
UpgradeAnnotation = MilvusIO + "upgrade"
AnnotationUpgrading = "upgrading"
AnnotationUpgraded = "upgraded"
StoppedAtAnnotation = MilvusIO + "stopped-at"

// PodServiceLabelAddedAnnotation is to indicate whether the milvus.io/service=true label is added to proxy & standalone pods
// previously, we use milvus.io/component: proxy / standalone; to select the service pods
// but now we want to support a standalone updating to cluster without downtime
// so instead we use milvus.io/service="true" to select the service pods
PodServiceLabelAddedAnnotation = MilvusIO + "pod-service-label-added"
// ServiceLabel is the label to indicate whether the pod is a service pod
ServiceLabel = MilvusIO + "service"

// query node rolling related labels
MilvusIOLabelQueryNodeGroupId = MilvusIO + "querynode-group-id"
MilvusIOLabelQueryNodeRolling = MilvusIO + "querynode-rolling-id"
// query node rolling related annotations
MilvusIOAnnotationCurrentQueryNodeGroupId = MilvusIO + "current-querynode-group-id"
MilvusIOAnnotationChangingQueryNodeMode = MilvusIO + "changing-querynode-mode"
)

// +kubebuilder:object:generate=false
type LabelsImpl struct{}

var singletonLabels = &LabelsImpl{}

func Labels() *LabelsImpl {
return singletonLabels
}

func (LabelsImpl) IsChangeQueryNodeMode(m Milvus) bool {
return m.Annotations[MilvusIOAnnotationChangingQueryNodeMode] == TrueStr
}

func (LabelsImpl) SetChangingQueryNodeMode(m *Milvus, changing bool) {
if changing {
m.Annotations[MilvusIOAnnotationChangingQueryNodeMode] = TrueStr
return
}
delete(m.Annotations, MilvusIOAnnotationChangingQueryNodeMode)
}

func (LabelsImpl) GetLabelQueryNodeGroupID(obj client.Object) string {
labels := obj.GetLabels()
if len(labels) < 1 {
return ""
}
return labels[MilvusIOLabelQueryNodeGroupId]
}

func (l LabelsImpl) SetQueryNodeGroupID(labels map[string]string, groupId int) {
l.SetQueryNodeGroupIDStr(labels, strconv.Itoa(groupId))
}

func (l LabelsImpl) SetQueryNodeGroupIDStr(labels map[string]string, groupIdStr string) {
labels[MilvusIOLabelQueryNodeGroupId] = groupIdStr
}

func (LabelsImpl) GetCurrentQueryNodeGroupId(m *Milvus) string {
annot := m.GetAnnotations()
if len(annot) < 1 {
return ""
}
return annot[MilvusIOAnnotationCurrentQueryNodeGroupId]
}

func (l LabelsImpl) SetCurrentQueryNodeGroupID(m *Milvus, groupId int) {
l.SetCurrentQueryNodeGroupIDStr(m, strconv.Itoa(groupId))
}

func (LabelsImpl) SetCurrentQueryNodeGroupIDStr(m *Milvus, groupId string) {
m.Annotations[MilvusIOAnnotationCurrentQueryNodeGroupId] = groupId
}

// IsQueryNodeRolling: if not empty, it means the query node has no rolling in progress
func (LabelsImpl) IsQueryNodeRolling(m Milvus) bool {
return len(m.Labels[MilvusIOLabelQueryNodeRolling]) > 0
}

func (LabelsImpl) GetQueryNodeRollingId(m Milvus) string {
return m.Labels[MilvusIOLabelQueryNodeRolling]
}

func (LabelsImpl) SetQueryNodeRolling(m *Milvus, rolling bool) {
if rolling {
if len(m.Labels[MilvusIOLabelQueryNodeRolling]) == 0 {
m.Labels[MilvusIOLabelQueryNodeRolling] = strconv.Itoa(int(m.GetGeneration()))
}
return
}
delete(m.Labels, MilvusIOLabelQueryNodeRolling)
}
52 changes: 52 additions & 0 deletions apis/milvus.io/v1beta1/label_annotations_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package v1beta1

import (
"testing"

"github.com/stretchr/testify/assert"
)

func TestLabelsImpl_IsChangeQueryNodeMode_SetChangingQueryNodeMode(t *testing.T) {
mc := Milvus{}
mc.Default()
assert.False(t, Labels().IsChangeQueryNodeMode(mc))
Labels().SetChangingQueryNodeMode(&mc, true)
assert.True(t, Labels().IsChangeQueryNodeMode(mc))
Labels().SetChangingQueryNodeMode(&mc, false)
assert.False(t, Labels().IsChangeQueryNodeMode(mc))
}

func TestLabelsImpl_GetLabelQueryNodeGroupID_SetQueryNodeGroupID(t *testing.T) {
mc := Milvus{}
mc.Default()
assert.Equal(t, "", Labels().GetLabelQueryNodeGroupID(&mc))
Labels().SetQueryNodeGroupID(mc.Labels, 1)
assert.Equal(t, "1", Labels().GetLabelQueryNodeGroupID(&mc))
Labels().SetQueryNodeGroupID(mc.Labels, 0)
assert.Equal(t, "0", Labels().GetLabelQueryNodeGroupID(&mc))

}

func TestLabelsImpl_GetCurrentQueryNodeGroupId_SetCurrentQueryNodeGroupID(t *testing.T) {
mc := Milvus{}
mc.Default()
assert.Equal(t, "", Labels().GetCurrentQueryNodeGroupId(&mc))
Labels().SetCurrentQueryNodeGroupID(&mc, 1)
assert.Equal(t, "1", Labels().GetCurrentQueryNodeGroupId(&mc))
Labels().SetCurrentQueryNodeGroupID(&mc, 0)
assert.Equal(t, "0", Labels().GetCurrentQueryNodeGroupId(&mc))
}

func TestLabelsImpl_IsQueryNodeRolling_GetQueryNodeRollingId_SetQueryNodeRolling(t *testing.T) {
mc := Milvus{}
mc.Generation = 1
mc.Default()
assert.False(t, Labels().IsQueryNodeRolling(mc))
assert.Equal(t, "", Labels().GetQueryNodeRollingId(mc))
Labels().SetQueryNodeRolling(&mc, true)
assert.True(t, Labels().IsQueryNodeRolling(mc))
assert.Equal(t, "1", Labels().GetQueryNodeRollingId(mc))
Labels().SetQueryNodeRolling(&mc, false)
assert.False(t, Labels().IsQueryNodeRolling(mc))
assert.Equal(t, "", Labels().GetQueryNodeRollingId(mc))
}
20 changes: 20 additions & 0 deletions apis/milvus.io/v1beta1/milvus_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,9 @@ type MilvusStatus struct {
// +optional
ComponentsDeployStatus map[string]ComponentDeployStatus `json:"componentsDeployStatus,omitempty"`

// RollingMode is the version of rolling mode the milvus CR is using
RollingMode RollingMode `json:"rollingModeVersion,omitempty"`

// ObservedGeneration has same usage as deployment.status.observedGeneration
// it represents the .metadata.generation that the condition was set based upon.
// For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
Expand All @@ -175,6 +178,18 @@ type MilvusStatus struct {
ObservedGeneration int64 `json:"observedGeneration,omitempty" protobuf:"varint,3,opt,name=observedGeneration"`
}

// RollingMode we have changed our rolling mode several times, so we use this enum to track the version of rolling mode the milvus CR is using
type RollingMode int

// RollingMode definitions
const (
RollingModeNotSet RollingMode = iota
// this mode has 1 query node deployment, uses k8s deployment's default rolling update strategy
RollingModeV1
// this mode has 2 query node deployment, operator takes care of querynode rolling update
RollingModeV2
)

type ComponentDeployStatus struct {
// Generation of the deployment
Generation int64 `json:"generation"`
Expand Down Expand Up @@ -382,6 +397,8 @@ const (
ReasonSecretDecodeErr = "SecretDecodeError"
ReasonClientErr = "ClientError"
ReasonDependencyNotReady = "DependencyNotReady"

MsgMilvusHasTerminatingPods = "Milvus has terminating pods"
)

// +genclient
Expand Down Expand Up @@ -419,6 +436,9 @@ func (m *Milvus) IsChangingMode() bool {
}

func (m *Milvus) IsPodServiceLabelAdded() bool {
if len(m.Annotations) < 1 {
return false
}
return m.Annotations[PodServiceLabelAddedAnnotation] == TrueStr
}

Expand Down
25 changes: 0 additions & 25 deletions apis/milvus.io/v1beta1/milvus_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -196,31 +196,6 @@ func deleteUnsettableConf(conf map[string]interface{}) {
}
}

const (
MilvusIO = "milvus.io"
OperatorVersionLabel = MilvusIO + "/operator-version"
// DependencyValuesLegacySyncedAnnotation : For legacy versions before v0.5.1, default value is not set to CR.
// So if they upgrade to v0.5.1+, if the dependency default values in milvus-helm updated
// the inCluster dependencies will get restarted. So we sync defaults first to prevent this
DependencyValuesLegacySyncedAnnotation = MilvusIO + "/dependency-values-legacy-synced"
DependencyValuesMergedAnnotation = MilvusIO + "/dependency-values-merged"
LegacyVersion = "v0.5.0-legacy"
FalseStr = "false"
TrueStr = "true"
UpgradeAnnotation = MilvusIO + "/upgrade"
AnnotationUpgrading = "upgrading"
AnnotationUpgraded = "upgraded"
StoppedAtAnnotation = MilvusIO + "/stopped-at"

// PodServiceLabelAddedAnnotation is to indicate whether the milvus.io/service=true label is added to proxy & standalone pods
// previously, we use milvus.io/component: proxy / standalone; to select the service pods
// but now we want to support a standalone updating to cluster without downtime
// so instead we use milvus.io/service="true" to select the service pods
PodServiceLabelAddedAnnotation = MilvusIO + "/pod-service-label-added"
// ServiceLabel is the label to indicate whether the pod is a service pod
ServiceLabel = "milvus.io/service"
)

var (
Version = "unknown"
MilvusHelmVersion = "unknown"
Expand Down
71 changes: 2 additions & 69 deletions charts/milvus-operator/templates/clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,59 +9,7 @@ rules:
- apiGroups:
- ""
resources:
- configmaps
- secrets
- services
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- ""
resources:
- deployments
- statefulsets
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- ""
resources:
- persistentvolumeclaims
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- ""
resources:
- pods
- pods/exec
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- ""
resources:
- serviceaccounts
- '*'
verbs:
- create
- delete
Expand All @@ -73,22 +21,7 @@ rules:
- apiGroups:
- apps
resources:
- deployments
- statefulsets
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- apps
resources:
- pods
- secrets
- services
- '*'
verbs:
- create
- delete
Expand Down
6 changes: 6 additions & 0 deletions charts/milvus-operator/templates/crds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6297,6 +6297,8 @@ spec:
standalone:
type: integer
type: object
rollingModeVersion:
type: integer
status:
default: Pending
type: string
Expand Down Expand Up @@ -7253,6 +7255,8 @@ spec:
standalone:
type: integer
type: object
rollingModeVersion:
type: integer
status:
default: Pending
type: string
Expand Down Expand Up @@ -13567,6 +13571,8 @@ spec:
standalone:
type: integer
type: object
rollingModeVersion:
type: integer
status:
default: Pending
type: string
Expand Down
2 changes: 2 additions & 0 deletions config/crd/bases/milvus.io_milvusclusters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6296,6 +6296,8 @@ spec:
standalone:
type: integer
type: object
rollingModeVersion:
type: integer
status:
default: Pending
type: string
Expand Down
Loading
Loading