Skip to content

Commit

Permalink
feat: catch up with Kubeadm in terms of conditions
Browse files Browse the repository at this point in the history
This is the first set of conditions support:
- `ResizedCondition`.
- `ControlPlaneComponentsHealthy`
- `EtcdClusterHealthy`
- `Available`
- `MachinesReady`
- `MachinesCreated`

Signed-off-by: Artem Chernyshev <[email protected]>
  • Loading branch information
Unix4ever committed Oct 8, 2021
1 parent 43eb75b commit 1662815
Show file tree
Hide file tree
Showing 12 changed files with 544 additions and 153 deletions.
2 changes: 1 addition & 1 deletion .drone.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ type: kubernetes

services:
- name: docker
image: docker:20.10-dind
image: ghcr.io/smira/docker:20.10-dind-hacked
entrypoint: [dockerd]
privileged: true
volumes:
Expand Down
77 changes: 77 additions & 0 deletions api/v1alpha3/conditions.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

// +kubebuilder:object:generate=true
// +groupName=controlplane.cluster.x-k8s.io
package v1alpha3

import clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha4"

// Conditions and condition Reasons for the TalosControlPlane object

const (
// MachinesReadyCondition reports an aggregate of current status of the machines controlled by the TalosControlPlane.
MachinesReadyCondition clusterv1.ConditionType = "MachinesReady"
)

const (
// AvailableCondition documents that the first control plane instance has completed Talos boot sequence
// and so the control plane is available and an API server instance is ready for processing requests.
AvailableCondition clusterv1.ConditionType = "Available"

// WaitingForTalosBootReason (Severity=Info) documents a TalosControlPlane object waiting for the first
// control plane instance to complete Talos boot sequence.
WaitingForTalosBootReason = "WaitingForTalosBoot"
)

const (
// ResizedCondition documents a TalosControlPlane that is resizing the set of controlled machines.
ResizedCondition clusterv1.ConditionType = "Resized"

// ScalingUpReason (Severity=Info) documents a TalosControlPlane that is increasing the number of replicas.
ScalingUpReason = "ScalingUp"

// ScalingDownReason (Severity=Info) documents a TalosControlPlane that is decreasing the number of replicas.
ScalingDownReason = "ScalingDown"
)

const (
// ControlPlaneComponentsHealthyCondition reports the overall status of control plane components
// implemented as static pods generated by Talos including kube-api-server, kube-controller manager,
// kube-scheduler and etcd.
ControlPlaneComponentsHealthyCondition clusterv1.ConditionType = "ControlPlaneComponentsHealthy"

// ControlPlaneComponentsUnhealthyReason (Severity=Error) documents a control plane component not healthy.
ControlPlaneComponentsUnhealthyReason = "ControlPlaneComponentsUnhealthy"

// ControlPlaneComponentsInspectionFailedReason documents a failure in inspecting the control plane component status.
ControlPlaneComponentsInspectionFailedReason = "ControlPlaneComponentsInspectionFailed"
)

const (
// EtcdClusterHealthyCondition documents the overall etcd cluster's health.
EtcdClusterHealthyCondition clusterv1.ConditionType = "EtcdClusterHealthyCondition"

// EtcdClusterUnhealthyReason (Severity=Error) is set when the etcd cluster is unhealthy.
EtcdClusterUnhealthyReason = "EtcdClusterUnhealthy"
)

const (
// MachinesCreatedCondition documents that the machines controlled by the TalosControlPlane are created.
// When this condition is false, it indicates that there was an error when cloning the infrastructure/bootstrap template or
// when generating the machine object.
MachinesCreatedCondition clusterv1.ConditionType = "MachinesCreated"

// InfrastructureTemplateCloningFailedReason (Severity=Error) documents a TalosControlPlane failing to
// clone the infrastructure template.
InfrastructureTemplateCloningFailedReason = "InfrastructureTemplateCloningFailed"

// BootstrapTemplateCloningFailedReason (Severity=Error) documents a TalosControlPlane failing to
// clone the bootstrap template.
BootstrapTemplateCloningFailedReason = "BootstrapTemplateCloningFailed"

// MachineGenerationFailedReason (Severity=Error) documents a TalosControlPlane failing to
// generate a machine object.
MachineGenerationFailedReason = "MachineGenerationFailed"
)
19 changes: 19 additions & 0 deletions api/v1alpha3/taloscontrolplane_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
cabptv1 "github.com/talos-systems/cluster-api-bootstrap-provider-talos/api/v1alpha3"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha4"
)

const (
Expand Down Expand Up @@ -88,6 +89,14 @@ type TalosControlPlaneStatus struct {
// state, and will be set to a descriptive error message.
// +optional
FailureMessage *string `json:"failureMessage,omitempty"`

// ObservedGeneration is the latest generation observed by the controller.
// +optional
ObservedGeneration int64 `json:"observedGeneration,omitempty"`

// Conditions defines current service state of the KubeadmControlPlane.
// +optional
Conditions clusterv1.Conditions `json:"conditions,omitempty"`
}

// +kubebuilder:object:root=true
Expand All @@ -110,6 +119,16 @@ type TalosControlPlane struct {
Status TalosControlPlaneStatus `json:"status,omitempty"`
}

// GetConditions returns the set of conditions for this object.
func (in *TalosControlPlane) GetConditions() clusterv1.Conditions {
return in.Status.Conditions
}

// SetConditions sets the conditions on this object.
func (in *TalosControlPlane) SetConditions(conditions clusterv1.Conditions) {
in.Status.Conditions = conditions
}

// +kubebuilder:object:root=true

// TalosControlPlaneList contains a list of TalosControlPlane
Expand Down
8 changes: 8 additions & 0 deletions api/v1alpha3/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,35 @@ spec:
status:
description: TalosControlPlaneStatus defines the observed state of TalosControlPlane
properties:
conditions:
description: Conditions defines current service state of the KubeadmControlPlane.
items:
description: Condition defines an observation of a Cluster API resource operational state.
properties:
lastTransitionTime:
description: Last time the condition transitioned from one status to another. This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
format: date-time
type: string
message:
description: A human readable message indicating details about the transition. This field may be empty.
type: string
reason:
description: The reason for the condition's last transition in CamelCase. The specific API may choose whether or not this field is considered a guaranteed API. This field may not be empty.
type: string
severity:
description: Severity provides an explicit classification of Reason code, so the users or machines can immediately understand the current situation and act accordingly. The Severity field MUST be set only when Status=False.
type: string
status:
description: Status of the condition, one of True, False, Unknown.
type: string
type:
description: Type of condition in CamelCase or in foo.example.com/CamelCase. Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be useful (see .node.status.conditions), the ability to deconflict is important.
type: string
required:
- status
- type
type: object
type: array
failureMessage:
description: ErrorMessage indicates that there is a terminal problem reconciling the state, and will be set to a descriptive error message.
type: string
Expand All @@ -167,6 +196,10 @@ spec:
initialized:
description: Initialized denotes whether or not the control plane has the uploaded talos-config configmap.
type: boolean
observedGeneration:
description: ObservedGeneration is the latest generation observed by the controller.
format: int64
type: integer
ready:
description: Ready denotes that the TalosControlPlane API Server is ready to receive requests.
type: boolean
Expand Down
30 changes: 28 additions & 2 deletions controllers/configs.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ package controllers
import (
"context"
"fmt"
"net"
"time"

cabptv1 "github.com/talos-systems/cluster-api-bootstrap-provider-talos/api/v1alpha3"
talosclient "github.com/talos-systems/talos/pkg/machinery/client"
Expand All @@ -16,13 +18,31 @@ import (
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/clientcmd"
"k8s.io/client-go/util/connrotation"
capiv1 "sigs.k8s.io/cluster-api/api/v1alpha4"
"sigs.k8s.io/controller-runtime/pkg/client"
)

type kubernetesClient struct {
*kubernetes.Clientset

dialer *connrotation.Dialer
}

// Close kubernetes client.
func (k *kubernetesClient) Close() error {
k.dialer.CloseAll()

return nil
}

func newDialer() *connrotation.Dialer {
return connrotation.NewDialer((&net.Dialer{Timeout: 30 * time.Second, KeepAlive: 30 * time.Second}).DialContext)
}

// kubeconfigForCluster will fetch a kubeconfig secret based on cluster name/namespace,
// use it to create a clientset, and return it.
func (r *TalosControlPlaneReconciler) kubeconfigForCluster(ctx context.Context, cluster client.ObjectKey) (*kubernetes.Clientset, error) {
func (r *TalosControlPlaneReconciler) kubeconfigForCluster(ctx context.Context, cluster client.ObjectKey) (*kubernetesClient, error) {
kubeconfigSecret := &corev1.Secret{}

err := r.Client.Get(ctx,
Expand All @@ -41,12 +61,18 @@ func (r *TalosControlPlaneReconciler) kubeconfigForCluster(ctx context.Context,
return nil, err
}

dialer := newDialer()
config.Dial = dialer.DialContext

clientset, err := kubernetes.NewForConfig(config)
if err != nil {
return nil, err
}

return clientset, nil
return &kubernetesClient{
Clientset: clientset,
dialer: dialer,
}, nil
}

// talosconfigForMachine will generate a talosconfig that uses *all* found addresses as the endpoints.
Expand Down
16 changes: 12 additions & 4 deletions controllers/etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@ import (
)

func (r *TalosControlPlaneReconciler) etcdHealthcheck(ctx context.Context, cluster *capiv1.Cluster, ownedMachines []capiv1.Machine) error {
clientset, err := r.kubeconfigForCluster(ctx, util.ObjectKey(cluster))
kubeclient, err := r.kubeconfigForCluster(ctx, util.ObjectKey(cluster))
if err != nil {
return err
}

defer kubeclient.Close() //nolint:errcheck

machines := []capiv1.Machine{}

for _, machine := range ownedMachines {
Expand All @@ -30,11 +32,13 @@ func (r *TalosControlPlaneReconciler) etcdHealthcheck(ctx context.Context, clust
}
}

c, err := r.talosconfigForMachines(ctx, clientset, machines...)
c, err := r.talosconfigForMachines(ctx, kubeclient.Clientset, machines...)
if err != nil {
return err
}

defer c.Close() //nolint:errcheck

service := "etcd"

params := make([]interface{}, 0, len(machines)*2)
Expand Down Expand Up @@ -178,16 +182,20 @@ func (r *TalosControlPlaneReconciler) auditEtcd(ctx context.Context, cluster cli
return fmt.Errorf("no CP machine which is not being deleted and has node ref")
}

clientset, err := r.kubeconfigForCluster(ctx, cluster)
kubeclient, err := r.kubeconfigForCluster(ctx, cluster)
if err != nil {
return err
}

c, err := r.talosconfigForMachines(ctx, clientset, designatedCPMachine)
defer kubeclient.Close() //nolint:errcheck

c, err := r.talosconfigForMachines(ctx, kubeclient.Clientset, designatedCPMachine)
if err != nil {
return err
}

defer c.Close() //nolint:errcheck

response, err := c.EtcdMemberList(ctx, &machine.EtcdMemberListRequest{})
if err != nil {
return fmt.Errorf("error getting etcd members via %q (endpoints %v): %w", designatedCPMachine.Name, c.GetConfigContext().Endpoints, err)
Expand Down
Loading

0 comments on commit 1662815

Please sign in to comment.