From ea07d8e3b7032309a80bdbb1a181937a8eb3fd8a Mon Sep 17 00:00:00 2001 From: mansikulkarni96 Date: Wed, 27 Aug 2025 14:37:30 -0400 Subject: [PATCH] Implement certificate-based RBAC for WICD **Certificate-Based Authentication:** - Add certificate-based ClusterRole (system-wicd-nodes) - Add ClusterRoleBinding that grants permissions to system:wicd-nodes group - Certificate identity format: system:wicd-node:nodename **RBAC Permissions:** - Bootstrap ServiceAccount: Minimal permissions for CSR creation and ConfigMap access - Certificate-based users: Enhanced permissions for node patching **Security Model:** 1. ServiceAccount (bootstrap): Limited permissions during initial setup 2. CSR Controller: Validates certificate requests from nodes 3. Node-specific access enforced in a follow-up PR by webhook. --- ...c.authorization.k8s.io_v1_clusterrole.yaml | 42 + ...rization.k8s.io_v1_clusterrolebinding.yaml | 16 + ...nstance-config-daemon-service-account.yaml | 4 + ...c.authorization.k8s.io_v1_clusterrole.yaml | 23 +- ...config-operator.clusterserviceversion.yaml | 9 + cmd/daemon/controller.go | 16 +- cmd/operator/main.go | 10 + config/rbac/kustomization.yaml | 2 + config/rbac/role.yaml | 9 + .../wicd-certificate-group-clusterrole.yaml | 29 + ...-certificate-group-clusterrolebinding.yaml | 15 + ...s-instance-config-daemon-cluster-role.yaml | 24 +- controllers/configmap_controller.go | 227 +++-- controllers/wicd_csr_controller.go | 199 +++++ hack/run-ci-e2e-test.sh | 2 + pkg/csr/csr.go | 226 ++--- pkg/csr/csr_test.go | 3 +- pkg/csr/validation/types.go | 48 ++ pkg/csr/validation/validation.go | 251 ++++++ pkg/daemon/certs/wicd_cert_manager.go | 144 ++++ pkg/daemon/controller/controller.go | 36 +- pkg/nodeconfig/nodeconfig.go | 80 +- pkg/rbac/rbac.go | 9 + pkg/windows/windows.go | 16 +- test/e2e/create_test.go | 1 + test/e2e/validation_test.go | 6 +- test/e2e/wicd_rbac_test.go | 126 +++ .../client-go/tools/watch/informerwatcher.go | 166 ++++ .../client-go/tools/watch/retrywatcher.go | 327 +++++++ vendor/k8s.io/client-go/tools/watch/until.go | 168 ++++ .../k8s.io/client-go/util/certificate/OWNERS | 8 + .../util/certificate/certificate_manager.go | 809 ++++++++++++++++++ .../util/certificate/certificate_store.go | 335 ++++++++ .../client-go/util/certificate/csr/csr.go | 377 ++++++++ vendor/modules.txt | 3 + 35 files changed, 3476 insertions(+), 290 deletions(-) create mode 100644 bundle/manifests/system-wicd-nodes_rbac.authorization.k8s.io_v1_clusterrole.yaml create mode 100644 bundle/manifests/system-wicd-nodes_rbac.authorization.k8s.io_v1_clusterrolebinding.yaml create mode 100644 bundle/manifests/windows-instance-config-daemon-service-account.yaml create mode 100644 config/rbac/wicd-certificate-group-clusterrole.yaml create mode 100644 config/rbac/wicd-certificate-group-clusterrolebinding.yaml create mode 100644 controllers/wicd_csr_controller.go create mode 100644 pkg/csr/validation/types.go create mode 100644 pkg/csr/validation/validation.go create mode 100644 pkg/daemon/certs/wicd_cert_manager.go create mode 100644 pkg/rbac/rbac.go create mode 100644 test/e2e/wicd_rbac_test.go create mode 100644 vendor/k8s.io/client-go/tools/watch/informerwatcher.go create mode 100644 vendor/k8s.io/client-go/tools/watch/retrywatcher.go create mode 100644 vendor/k8s.io/client-go/tools/watch/until.go create mode 100644 vendor/k8s.io/client-go/util/certificate/OWNERS create mode 100644 vendor/k8s.io/client-go/util/certificate/certificate_manager.go create mode 100644 vendor/k8s.io/client-go/util/certificate/certificate_store.go create mode 100644 vendor/k8s.io/client-go/util/certificate/csr/csr.go diff --git a/bundle/manifests/system-wicd-nodes_rbac.authorization.k8s.io_v1_clusterrole.yaml b/bundle/manifests/system-wicd-nodes_rbac.authorization.k8s.io_v1_clusterrole.yaml new file mode 100644 index 0000000000..fcd3434608 --- /dev/null +++ b/bundle/manifests/system-wicd-nodes_rbac.authorization.k8s.io_v1_clusterrole.yaml @@ -0,0 +1,42 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + creationTimestamp: null + labels: + app.kubernetes.io/name: windows-machine-config-operator + app.kubernetes.io/part-of: wicd + name: system-wicd-nodes +rules: +- apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - nodes + verbs: + - list +- apiGroups: + - "" + resources: + - nodes + - nodes/status + verbs: + - get + - patch + - update + - watch +- apiGroups: + - certificates.k8s.io + resources: + - certificatesigningrequests + verbs: + - create + - get + - list + - watch diff --git a/bundle/manifests/system-wicd-nodes_rbac.authorization.k8s.io_v1_clusterrolebinding.yaml b/bundle/manifests/system-wicd-nodes_rbac.authorization.k8s.io_v1_clusterrolebinding.yaml new file mode 100644 index 0000000000..d951181f76 --- /dev/null +++ b/bundle/manifests/system-wicd-nodes_rbac.authorization.k8s.io_v1_clusterrolebinding.yaml @@ -0,0 +1,16 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + creationTimestamp: null + labels: + app.kubernetes.io/name: windows-machine-config-operator + app.kubernetes.io/part-of: wicd + name: system-wicd-nodes +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: system-wicd-nodes +subjects: +- apiGroup: rbac.authorization.k8s.io + kind: Group + name: system:wicd-nodes diff --git a/bundle/manifests/windows-instance-config-daemon-service-account.yaml b/bundle/manifests/windows-instance-config-daemon-service-account.yaml new file mode 100644 index 0000000000..008fd79d7c --- /dev/null +++ b/bundle/manifests/windows-instance-config-daemon-service-account.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: windows-instance-config-daemon diff --git a/bundle/manifests/windows-instance-config-daemon_rbac.authorization.k8s.io_v1_clusterrole.yaml b/bundle/manifests/windows-instance-config-daemon_rbac.authorization.k8s.io_v1_clusterrole.yaml index ac10aeb0e9..4d5292909f 100644 --- a/bundle/manifests/windows-instance-config-daemon_rbac.authorization.k8s.io_v1_clusterrole.yaml +++ b/bundle/manifests/windows-instance-config-daemon_rbac.authorization.k8s.io_v1_clusterrole.yaml @@ -2,22 +2,33 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: creationTimestamp: null + labels: + app.kubernetes.io/name: windows-machine-config-operator + app.kubernetes.io/part-of: wicd name: windows-instance-config-daemon rules: - apiGroups: - "" resources: - - nodes + - configmaps verbs: - - list - - watch - get - - patch - - update + - list - apiGroups: - "" resources: - - nodes/status + - nodes verbs: + - get + - list - patch - update +- apiGroups: + - certificates.k8s.io + resources: + - certificatesigningrequests + verbs: + - create + - get + - list + - watch diff --git a/bundle/manifests/windows-machine-config-operator.clusterserviceversion.yaml b/bundle/manifests/windows-machine-config-operator.clusterserviceversion.yaml index 44113c9d2d..f2449b0618 100644 --- a/bundle/manifests/windows-machine-config-operator.clusterserviceversion.yaml +++ b/bundle/manifests/windows-machine-config-operator.clusterserviceversion.yaml @@ -265,6 +265,7 @@ spec: resources: - certificatesigningrequests verbs: + - create - get - list - watch @@ -288,6 +289,14 @@ spec: - get - patch - update + - apiGroups: + - certificates.k8s.io + resourceNames: + - kubernetes.io/kube-apiserver-client + resources: + - signers + verbs: + - approve - apiGroups: - certificates.k8s.io resourceNames: diff --git a/cmd/daemon/controller.go b/cmd/daemon/controller.go index 469bf7b209..3e669a1fdc 100644 --- a/cmd/daemon/controller.go +++ b/cmd/daemon/controller.go @@ -21,6 +21,7 @@ package main import ( "flag" "os" + "time" "github.com/spf13/cobra" "k8s.io/klog/v2" @@ -40,6 +41,9 @@ var ( windowsService bool logDir string caBundle string + // Certificate-based authentication options + certDir string + certDuration string ) func init() { @@ -50,6 +54,10 @@ func init() { "Enables running as a Windows service") controllerCmd.PersistentFlags().StringVar(&caBundle, "ca-bundle", "", "the full path to CA bundle file containing certificates trusted by the cluster") + controllerCmd.PersistentFlags().StringVar(&certDir, "cert-dir", "C:\\k\\wicd-certs", + "Directory to store WICD client certificates") + controllerCmd.PersistentFlags().StringVar(&certDuration, "cert-duration", "1h", + "Duration for WICD certificates (e.g., 10m, 1h, 24h)") } func runControllerCmd(cmd *cobra.Command, args []string) { @@ -60,6 +68,12 @@ func runControllerCmd(cmd *cobra.Command, args []string) { fs.Set("logtostderr", "false") fs.Set("log_dir", logDir) } + duration, err := time.ParseDuration(certDuration) + if err != nil { + klog.Errorf("invalid cert-duration %s: %v", certDuration, err) + os.Exit(1) + } + ctx := ctrl.SetupSignalHandler() if windowsService { if err := initService(ctx); err != nil { @@ -68,7 +82,7 @@ func runControllerCmd(cmd *cobra.Command, args []string) { } } klog.Info("service controller running") - if err := controller.RunController(ctx, namespace, kubeconfig, caBundle); err != nil { + if err := controller.RunController(ctx, namespace, kubeconfig, caBundle, certDir, duration); err != nil { klog.Error(err) os.Exit(1) } diff --git a/cmd/operator/main.go b/cmd/operator/main.go index c8a12bd3b1..bab8a77228 100644 --- a/cmd/operator/main.go +++ b/cmd/operator/main.go @@ -266,6 +266,16 @@ func main() { os.Exit(1) } + wicdCSRController, err := controllers.NewWICDCSRController(mgr, watchNamespace) + if err != nil { + setupLog.Error(err, "unable to create WICD CSR controller") + os.Exit(1) + } + if err = wicdCSRController.SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "WICD-CSR") + os.Exit(1) + } + mcReconciler, err := controllers.NewControllerConfigReconciler(mgr, clusterConfig, watchNamespace) if err != nil { setupLog.Error(err, "unable to create ControllerConfig reconciler") diff --git a/config/rbac/kustomization.yaml b/config/rbac/kustomization.yaml index fa48252a01..7dcb112743 100644 --- a/config/rbac/kustomization.yaml +++ b/config/rbac/kustomization.yaml @@ -7,6 +7,8 @@ resources: # subjects if changing service account names. - role.yaml - role_binding.yaml +- wicd-certificate-group-clusterrole.yaml +- wicd-certificate-group-clusterrolebinding.yaml - leader_election_role.yaml - leader_election_role_binding.yaml # Comment the following 4 lines if you want to disable diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index b5564cf5ae..96b2eabca1 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -107,6 +107,7 @@ rules: resources: - certificatesigningrequests verbs: + - create - get - list - watch @@ -130,6 +131,14 @@ rules: - get - patch - update +- apiGroups: + - certificates.k8s.io + resourceNames: + - kubernetes.io/kube-apiserver-client + resources: + - signers + verbs: + - approve - apiGroups: - certificates.k8s.io resourceNames: diff --git a/config/rbac/wicd-certificate-group-clusterrole.yaml b/config/rbac/wicd-certificate-group-clusterrole.yaml new file mode 100644 index 0000000000..82843437b6 --- /dev/null +++ b/config/rbac/wicd-certificate-group-clusterrole.yaml @@ -0,0 +1,29 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: system-wicd-nodes + labels: + app.kubernetes.io/name: "windows-machine-config-operator" + app.kubernetes.io/part-of: "wicd" +rules: + # Allow reading ConfigMaps for bootstrap phase and cleanup + - apiGroups: [""] + resources: ["configmaps"] + verbs: ["get", "list"] + # Allow listing nodes for node discovery (no resourceNames restriction needed) + - apiGroups: [""] + resources: ["nodes"] + verbs: ["list"] + # WICD certificate-based approach: broader access than OVN due to Windows management needs + # Current implementation (Phase 1): Certificate authentication + group RBAC + # - CSR controller ensures only legitimate nodes get certificates + # - Certificate provides node-specific identity (system:wicd-node:nodename) + # - Group RBAC grants necessary permissions for Windows node configuration + # Future enhancement (Phase 2): Add admission webhook for operation-specific validation + - apiGroups: [""] + resources: ["nodes", "nodes/status"] + verbs: ["get", "patch", "update", "watch"] + # Allow creating CSRs for certificate renewal + - apiGroups: ["certificates.k8s.io"] + resources: ["certificatesigningrequests"] + verbs: ["create", "get", "list", "watch"] diff --git a/config/rbac/wicd-certificate-group-clusterrolebinding.yaml b/config/rbac/wicd-certificate-group-clusterrolebinding.yaml new file mode 100644 index 0000000000..65a9abdcc9 --- /dev/null +++ b/config/rbac/wicd-certificate-group-clusterrolebinding.yaml @@ -0,0 +1,15 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: system-wicd-nodes + labels: + app.kubernetes.io/name: "windows-machine-config-operator" + app.kubernetes.io/part-of: "wicd" +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: system-wicd-nodes +subjects: + - kind: Group + name: system:wicd-nodes + apiGroup: rbac.authorization.k8s.io diff --git a/config/wicd/windows-instance-config-daemon-cluster-role.yaml b/config/wicd/windows-instance-config-daemon-cluster-role.yaml index 55ecd66b59..d9618f5381 100644 --- a/config/wicd/windows-instance-config-daemon-cluster-role.yaml +++ b/config/wicd/windows-instance-config-daemon-cluster-role.yaml @@ -2,21 +2,33 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: windows-instance-config-daemon + labels: + app.kubernetes.io/name: "windows-machine-config-operator" + app.kubernetes.io/part-of: "wicd" rules: + # Bootstrap permissions - minimal access for initial setup ONLY - apiGroups: - "" resources: - - nodes + - configmaps verbs: - - list - - watch - get - - patch - - update + - list - apiGroups: - "" resources: - - nodes/status + - nodes verbs: + - get + - list - patch - update + - apiGroups: + - certificates.k8s.io + resources: + - certificatesigningrequests + verbs: + - create + - get + - list + - watch diff --git a/controllers/configmap_controller.go b/controllers/configmap_controller.go index 3803068065..cdda7db28f 100644 --- a/controllers/configmap_controller.go +++ b/controllers/configmap_controller.go @@ -26,7 +26,6 @@ import ( "reflect" "strings" - config "github.com/openshift/api/config/v1" oconfig "github.com/openshift/api/config/v1" mcfgv1 "github.com/openshift/api/machineconfiguration/v1" core "k8s.io/api/core/v1" @@ -76,10 +75,10 @@ const ( UsernameAnnotation = "windowsmachineconfig.openshift.io/username" // ConfigMapController is the name of this controller in logs and other outputs. ConfigMapController = "configmap" - // wicdRBACResourceName is the name of the resources associated with WICD's RBAC permissions - wicdRBACResourceName = "windows-instance-config-daemon" // InjectionRequestLabel is used to allow CNO to inject the trusted CA bundle when the global Proxy resource changes InjectionRequestLabel = "config.openshift.io/inject-trusted-cabundle" + // wicdRBACResourceName is the name used for WICD RBAC resources + wicdRBACResourceName = "windows-instance-config-daemon" ) // ConfigMapReconciler reconciles a ConfigMap object @@ -316,7 +315,7 @@ func (r *ConfigMapReconciler) ensureInstancesAreUpToDate(ctx context.Context, in for _, instanceInfo := range instances { // When platform type is none or Nutanix, kubelet will pick a random interface to use for the Node's IP. In that case we // should override that with the IP that the user is providing via the ConfigMap. - instanceInfo.SetNodeIP = r.platform == config.NonePlatformType || r.platform == config.NutanixPlatformType + instanceInfo.SetNodeIP = r.platform == oconfig.NonePlatformType || r.platform == oconfig.NutanixPlatformType encryptedUsername, err := crypto.EncryptToJSONString(instanceInfo.Username, privateKeyBytes) if err != nil { return fmt.Errorf("unable to encrypt username for instance %s: %w", instanceInfo.Address, err) @@ -400,6 +399,116 @@ func (r *ConfigMapReconciler) mapToServicesConfigMap(_ context.Context, _ client }} } +// EnsureWICDRBAC ensures the WICD RBAC resources exist as expected +func (r *ConfigMapReconciler) EnsureWICDRBAC(ctx context.Context) error { + if err := r.ensureWICDRoleBinding(ctx); err != nil { + return err + } + return r.ensureWICDClusterRoleBinding(ctx) +} + +// ensureWICDRoleBinding ensures the WICD RoleBinding resource exists as expected. +// Creates it if it doesn't exist, deletes and re-creates it if it exists with improper spec. +func (r *ConfigMapReconciler) ensureWICDRoleBinding(ctx context.Context) error { + existingRB, err := r.k8sclientset.RbacV1().RoleBindings(r.watchNamespace).Get(ctx, wicdRBACResourceName, + meta.GetOptions{}) + if err != nil && !k8sapierrors.IsNotFound(err) { + return fmt.Errorf("unable to get RoleBinding %s/%s: %w", r.watchNamespace, wicdRBACResourceName, err) + } + + expectedRB := &rbac.RoleBinding{ + ObjectMeta: meta.ObjectMeta{ + Name: wicdRBACResourceName, + }, + RoleRef: rbac.RoleRef{ + APIGroup: rbac.GroupName, + Kind: "Role", + Name: wicdRBACResourceName, + }, + Subjects: []rbac.Subject{{ + Kind: rbac.ServiceAccountKind, + Name: wicdRBACResourceName, + Namespace: r.watchNamespace, + }}, + } + if err == nil { + // check if existing RoleBinding's contents are as expected, delete it if not + if existingRB.RoleRef.Name == expectedRB.RoleRef.Name && + reflect.DeepEqual(existingRB.Subjects, expectedRB.Subjects) { + return nil + } + err = r.k8sclientset.RbacV1().RoleBindings(r.watchNamespace).Delete(ctx, wicdRBACResourceName, + meta.DeleteOptions{}) + if err != nil { + return fmt.Errorf("unable to delete RoleBinding %s/%s: %w", r.watchNamespace, wicdRBACResourceName, err) + } + r.log.Info("Deleted malformed resource", "RoleBinding", + kubeTypes.NamespacedName{Namespace: existingRB.Namespace, Name: existingRB.Name}, + "RoleRef", existingRB.RoleRef.Name, "Subjects", existingRB.Subjects) + } + // create proper resource if it does not exist + createdRB, err := r.k8sclientset.RbacV1().RoleBindings(r.watchNamespace).Create(ctx, expectedRB, + meta.CreateOptions{}) + if err != nil { + return fmt.Errorf("unable to create RoleBinding %s/%s: %w", r.watchNamespace, wicdRBACResourceName, err) + } + r.log.Info("Created resource", "RoleBinding", + kubeTypes.NamespacedName{Namespace: createdRB.Namespace, Name: createdRB.Name}, + "RoleRef", createdRB.RoleRef.Name, "Subjects", createdRB.Subjects) + + return nil +} + +// ensureWICDClusterRoleBinding ensures the WICD ClusterRoleBinding resource exists as expected. +// Creates it if it doesn't exist, deletes and re-creates it if it exists with improper spec. +func (r *ConfigMapReconciler) ensureWICDClusterRoleBinding(ctx context.Context) error { + existingCRB, err := r.k8sclientset.RbacV1().ClusterRoleBindings().Get(ctx, wicdRBACResourceName, + meta.GetOptions{}) + if err != nil && !k8sapierrors.IsNotFound(err) { + return err + } + + expectedCRB := &rbac.ClusterRoleBinding{ + ObjectMeta: meta.ObjectMeta{ + Name: wicdRBACResourceName, + }, + RoleRef: rbac.RoleRef{ + APIGroup: rbac.GroupName, + Kind: "ClusterRole", + Name: wicdRBACResourceName, + }, + Subjects: []rbac.Subject{{ + Kind: rbac.ServiceAccountKind, + Name: wicdRBACResourceName, + Namespace: r.watchNamespace, + }}, + } + if err == nil { + // check if existing ClusterRoleBinding's contents are as expected, delete it if not + if existingCRB.RoleRef.Name == expectedCRB.RoleRef.Name && + reflect.DeepEqual(existingCRB.Subjects, expectedCRB.Subjects) { + return nil + } + err = r.k8sclientset.RbacV1().ClusterRoleBindings().Delete(ctx, wicdRBACResourceName, + meta.DeleteOptions{}) + if err != nil { + return err + } + r.log.Info("Deleted malformed resource", "ClusterRoleBinding", existingCRB.Name, + "RoleRef", existingCRB.RoleRef.Name, "Subjects", existingCRB.Subjects) + + r.log.Info("Process will restart to reconcile resources") + defer os.Exit(1) + } + // create proper resource if it does not exist + createdCRB, err := r.k8sclientset.RbacV1().ClusterRoleBindings().Create(ctx, expectedCRB, meta.CreateOptions{}) + if err == nil { + r.log.Info("Created resource", "ClusterRoleBinding", createdCRB.Name, + "RoleRef", createdCRB.RoleRef.Name, "Subjects", createdCRB.Subjects) + } + return err +} + // SetupWithManager sets up the controller with the Manager. func (r *ConfigMapReconciler) SetupWithManager(mgr ctrl.Manager) error { configMapPredicate := predicate.Funcs{ @@ -601,116 +710,6 @@ func (r *ConfigMapReconciler) EnsureTrustedCAConfigMapExists(ctx context.Context return r.ensureProxyCertsCMIsValid(ctx, trustedCA.GetLabels()[InjectionRequestLabel]) } -// EnsureWICDRBAC ensures the WICD RBAC resources exist as expected -func (r *ConfigMapReconciler) EnsureWICDRBAC(ctx context.Context) error { - if err := r.ensureWICDRoleBinding(ctx); err != nil { - return err - } - return r.ensureWICDClusterRoleBinding(ctx) -} - -// ensureWICDRoleBinding ensures the WICD RoleBinding resource exists as expected. -// Creates it if it doesn't exist, deletes and re-creates it if it exists with improper spec. -func (r *ConfigMapReconciler) ensureWICDRoleBinding(ctx context.Context) error { - existingRB, err := r.k8sclientset.RbacV1().RoleBindings(r.watchNamespace).Get(ctx, wicdRBACResourceName, - meta.GetOptions{}) - if err != nil && !k8sapierrors.IsNotFound(err) { - return fmt.Errorf("unable to get RoleBinding %s/%s: %w", r.watchNamespace, wicdRBACResourceName, err) - } - - expectedRB := &rbac.RoleBinding{ - ObjectMeta: meta.ObjectMeta{ - Name: wicdRBACResourceName, - }, - RoleRef: rbac.RoleRef{ - APIGroup: rbac.GroupName, - Kind: "Role", - Name: wicdRBACResourceName, - }, - Subjects: []rbac.Subject{{ - Kind: rbac.ServiceAccountKind, - Name: wicdRBACResourceName, - Namespace: r.watchNamespace, - }}, - } - if err == nil { - // check if existing RoleBinding's contents are as expected, delete it if not - if existingRB.RoleRef.Name == expectedRB.RoleRef.Name && - reflect.DeepEqual(existingRB.Subjects, expectedRB.Subjects) { - return nil - } - err = r.k8sclientset.RbacV1().RoleBindings(r.watchNamespace).Delete(ctx, wicdRBACResourceName, - meta.DeleteOptions{}) - if err != nil { - return fmt.Errorf("unable to delete RoleBinding %s/%s: %w", r.watchNamespace, wicdRBACResourceName, err) - } - r.log.Info("Deleted malformed resource", "RoleBinding", - kubeTypes.NamespacedName{Namespace: existingRB.Namespace, Name: existingRB.Name}, - "RoleRef", existingRB.RoleRef.Name, "Subjects", existingRB.Subjects) - } - // create proper resource if it does not exist - createdRB, err := r.k8sclientset.RbacV1().RoleBindings(r.watchNamespace).Create(ctx, expectedRB, - meta.CreateOptions{}) - if err != nil { - return fmt.Errorf("unable to create RoleBinding %s/%s: %w", r.watchNamespace, wicdRBACResourceName, err) - } - r.log.Info("Created resource", "RoleBinding", - kubeTypes.NamespacedName{Namespace: createdRB.Namespace, Name: createdRB.Name}, - "RoleRef", createdRB.RoleRef.Name, "Subjects", createdRB.Subjects) - - return nil -} - -// ensureWICDClusterRoleBinding ensures the WICD ClusterRoleBinding resource exists as expected. -// Creates it if it doesn't exist, deletes and re-creates it if it exists with improper spec. -func (r *ConfigMapReconciler) ensureWICDClusterRoleBinding(ctx context.Context) error { - existingCRB, err := r.k8sclientset.RbacV1().ClusterRoleBindings().Get(ctx, wicdRBACResourceName, - meta.GetOptions{}) - if err != nil && !k8sapierrors.IsNotFound(err) { - return err - } - - expectedCRB := &rbac.ClusterRoleBinding{ - ObjectMeta: meta.ObjectMeta{ - Name: wicdRBACResourceName, - }, - RoleRef: rbac.RoleRef{ - APIGroup: rbac.GroupName, - Kind: "ClusterRole", - Name: wicdRBACResourceName, - }, - Subjects: []rbac.Subject{{ - Kind: rbac.ServiceAccountKind, - Name: wicdRBACResourceName, - Namespace: r.watchNamespace, - }}, - } - if err == nil { - // check if existing ClusterRoleBinding's contents are as expected, delete it if not - if existingCRB.RoleRef.Name == expectedCRB.RoleRef.Name && - reflect.DeepEqual(existingCRB.Subjects, expectedCRB.Subjects) { - return nil - } - err = r.k8sclientset.RbacV1().ClusterRoleBindings().Delete(ctx, wicdRBACResourceName, - meta.DeleteOptions{}) - if err != nil { - return err - } - r.log.Info("Deleted malformed resource", "ClusterRoleBinding", existingCRB.Name, - "RoleRef", existingCRB.RoleRef.Name, "Subjects", existingCRB.Subjects) - - r.log.Info("Process will restart to reconcile resources") - defer os.Exit(1) - } - // create proper resource if it does not exist - createdCRB, err := r.k8sclientset.RbacV1().ClusterRoleBindings().Create(ctx, expectedCRB, meta.CreateOptions{}) - if err == nil { - r.log.Info("Created resource", "ClusterRoleBinding", createdCRB.Name, - "RoleRef", createdCRB.RoleRef.Name, "Subjects", createdCRB.Subjects) - } - return err -} - // generateServicesManifest generates and regenerates the services manifest. // this gets called when the configmap reconciler is first created, to create the services manifest, // and also when the rendered-worker configmap is changed, to regenerate it. diff --git a/controllers/wicd_csr_controller.go b/controllers/wicd_csr_controller.go new file mode 100644 index 0000000000..33c8103ba7 --- /dev/null +++ b/controllers/wicd_csr_controller.go @@ -0,0 +1,199 @@ +/* +Copyright 2021. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controllers + +import ( + "context" + "fmt" + "strings" + + "github.com/go-logr/logr" + certificatesv1 "k8s.io/api/certificates/v1" + corev1 "k8s.io/api/core/v1" + k8sapierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/record" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + "github.com/openshift/windows-machine-config-operator/pkg/condition" + csrvalidation "github.com/openshift/windows-machine-config-operator/pkg/csr/validation" + "github.com/openshift/windows-machine-config-operator/pkg/rbac" + "github.com/openshift/windows-machine-config-operator/pkg/windows" +) + +//+kubebuilder:rbac:groups="certificates.k8s.io",resources=certificatesigningrequests/approval,verbs=update +//+kubebuilder:rbac:groups="certificates.k8s.io",resources=certificatesigningrequests,verbs=get;list;watch +//+kubebuilder:rbac:groups="certificates.k8s.io",resources=signers,verbs=approve,resourceNames=kubernetes.io/kube-apiserver-client +//+kubebuilder:rbac:groups=certificates.k8s.io,resources=certificatesigningrequests/status,verbs=get;update;patch + +const ( + // WICDCSRController is the name of this controller in logs + WICDCSRController = "wicd-csr" +) + +// wicdCSRReconciler handles certificate signing requests for WICD +type wicdCSRReconciler struct { + client client.Client + k8sclientset *kubernetes.Clientset + log logr.Logger + watchNamespace string + recorder record.EventRecorder + validator *csrvalidation.CSRValidator +} + +// NewWICDCSRController creates a new WICD CSR controller following the existing pattern +func NewWICDCSRController(mgr manager.Manager, watchNamespace string) (*wicdCSRReconciler, error) { + clientset, err := kubernetes.NewForConfig(mgr.GetConfig()) + if err != nil { + return nil, fmt.Errorf("error creating kubernetes clientset: %w", err) + } + + validator := csrvalidation.NewCSRValidator(mgr.GetClient(), csrvalidation.WICDCertType) + + return &wicdCSRReconciler{ + client: mgr.GetClient(), + log: ctrl.Log.WithName("controllers").WithName(WICDCSRController), + k8sclientset: clientset, + watchNamespace: watchNamespace, + recorder: mgr.GetEventRecorderFor(WICDCSRController), + validator: validator, + }, nil +} + +// isWICDCSR checks if this CSR is from a WICD identity (service account or certificate-based) +func (r *wicdCSRReconciler) isWICDCSR(obj runtime.Object) bool { + csr, ok := obj.(*certificatesv1.CertificateSigningRequest) + if !ok { + return false + } + return r.isWICDServiceAccount(csr) || r.isWICDCertificateIdentity(csr) +} + +// isPendingCSR checks if this CSR is pending (neither approved nor denied) +func isPendingCSR(obj runtime.Object) bool { + csr, ok := obj.(*certificatesv1.CertificateSigningRequest) + if !ok { + return false + } + for _, condition := range csr.Status.Conditions { + if condition.Type == certificatesv1.CertificateApproved || condition.Type == certificatesv1.CertificateDenied { + return false + } + } + return true +} + +// SetupWithManager sets up the controller with the Manager +func (r *wicdCSRReconciler) SetupWithManager(mgr ctrl.Manager) error { + wicdCSRPredicate := predicate.Funcs{ + CreateFunc: func(e event.CreateEvent) bool { + return r.isWICDCSR(e.Object) && isPendingCSR(e.Object) + }, + UpdateFunc: func(e event.UpdateEvent) bool { + return r.isWICDCSR(e.ObjectNew) && isPendingCSR(e.ObjectNew) + }, + GenericFunc: func(e event.GenericEvent) bool { + return r.isWICDCSR(e.Object) && isPendingCSR(e.Object) + }, + DeleteFunc: func(e event.DeleteEvent) bool { + // We don't need to handle delete events for CSRs + return false + }, + } + return builder.ControllerManagedBy(mgr). + For(&certificatesv1.CertificateSigningRequest{}, builder.WithPredicates(wicdCSRPredicate)). + Named(WICDCSRController). + Complete(r) +} + +// Reconcile processes WICD CertificateSigningRequests +func (r *wicdCSRReconciler) Reconcile(ctx context.Context, req reconcile.Request) (result reconcile.Result, err error) { + // Prevent WMCO upgrades while CSRs are being processed + if err := condition.MarkAsBusy(ctx, r.client, r.watchNamespace, r.recorder, NodeController); err != nil { + return ctrl.Result{}, err + } + defer func() { + err = markAsFreeOnSuccess(ctx, r.client, r.watchNamespace, r.recorder, NodeController, result.Requeue, err) + }() + r.log.V(1).Info("reconciling", "name", req.NamespacedName.String()) + + csr := &certificatesv1.CertificateSigningRequest{} + if err := r.client.Get(ctx, req.NamespacedName, csr); err != nil { + if k8sapierrors.IsNotFound(err) { + return reconcile.Result{}, nil + } + return reconcile.Result{}, fmt.Errorf("failed to get CertificateSigningRequest: %w", err) + } + + // If a CSR is approved/denied after being added to the queue, but before we reconcile it, + // trying to approve it again will result in an error and cause a loop. + // Return early if the CSR has been approved/denied externally. + if !isPendingCSR(csr) { + r.log.Info("CSR is already approved/denied", "Name", csr.Name) + return reconcile.Result{}, nil + } + + // Validate signer name for WICD certificates + if csr.Spec.SignerName != certificatesv1.KubeAPIServerClientSignerName { + r.log.Info("Ignoring CSR with unexpected signerName", "name", csr.Name, "signer", csr.Spec.SignerName) + return reconcile.Result{}, nil + } + + // Validate the CSR content + if err := r.validator.ValidateCSR(ctx, csr); err != nil { + r.log.Error(err, "WICD CSR validation failed, ignoring CSR") + return reconcile.Result{}, nil + } + + return r.approveCSR(ctx, csr) +} + +// isWICDServiceAccount checks if this CSR is from the WICD service account +func (r *wicdCSRReconciler) isWICDServiceAccount(csr *certificatesv1.CertificateSigningRequest) bool { + expectedUsername := fmt.Sprintf("system:serviceaccount:%s:%s", r.watchNamespace, windows.WicdServiceName) + return csr.Spec.Username == expectedUsername +} + +// isWICDCertificateIdentity checks if this CSR is from a WICD certificate-based identity +func (r *wicdCSRReconciler) isWICDCertificateIdentity(csr *certificatesv1.CertificateSigningRequest) bool { + return strings.HasPrefix(csr.Spec.Username, rbac.WICDUserPrefix) +} + +// approveCSR approves the certificate signing request using the proper UpdateApproval API +func (r *wicdCSRReconciler) approveCSR(ctx context.Context, csr *certificatesv1.CertificateSigningRequest) (reconcile.Result, error) { + csr.Status.Conditions = append(csr.Status.Conditions, certificatesv1.CertificateSigningRequestCondition{ + Type: certificatesv1.CertificateApproved, + Status: corev1.ConditionTrue, + Reason: "WICDAutoApproved", + Message: "This CSR was approved by the WICD certificate controller", + LastUpdateTime: metav1.Now(), + }) + + if _, err := r.k8sclientset.CertificatesV1().CertificateSigningRequests().UpdateApproval(ctx, csr.Name, csr, metav1.UpdateOptions{}); err != nil { + return reconcile.Result{}, fmt.Errorf("failed to approve CSR: %w", err) + } + r.log.Info("CSR approved", "CSR", csr.Name) + return reconcile.Result{}, nil +} diff --git a/hack/run-ci-e2e-test.sh b/hack/run-ci-e2e-test.sh index fcb4344a66..0c74fd2688 100755 --- a/hack/run-ci-e2e-test.sh +++ b/hack/run-ci-e2e-test.sh @@ -200,6 +200,8 @@ if [[ "$TEST" = "basic" ]]; then go test ./test/e2e/... -run=TestWMCO/network -v -timeout=20m -args $GO_TEST_ARGS printf "\n####### Testing storage #######\n" >> "$ARTIFACT_DIR"/wmco.log go test ./test/e2e/... -run=TestWMCO/storage -v -timeout=10m -args $GO_TEST_ARGS + printf "\n####### Testing wicd rbac #######\n" >> "$ARTIFACT_DIR"/wmco.log + go test ./test/e2e/... -run=TestWMCO/wicd_rbac -v -timeout=20m -args $GO_TEST_ARGS printf "\n####### Testing service reconciliation #######\n" >> "$ARTIFACT_DIR"/wmco.log go test ./test/e2e/... -run=TestWMCO/service_reconciliation -v -timeout=20m -args $GO_TEST_ARGS printf "\n####### Testing cluster-wide proxy #######\n" >> "$ARTIFACT_DIR"/wmco.log diff --git a/pkg/csr/csr.go b/pkg/csr/csr.go index 1e8b0ff7ab..4bc77837a1 100644 --- a/pkg/csr/csr.go +++ b/pkg/csr/csr.go @@ -10,7 +10,6 @@ package csr import ( "context" "crypto/x509" - "encoding/pem" "fmt" "net" "reflect" @@ -23,12 +22,12 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" meta "k8s.io/apimachinery/pkg/apis/meta/v1" kubeTypes "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/validation" "k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/record" "sigs.k8s.io/controller-runtime/pkg/client" + csrvalidation "github.com/openshift/windows-machine-config-operator/pkg/csr/validation" "github.com/openshift/windows-machine-config-operator/pkg/instance" "github.com/openshift/windows-machine-config-operator/pkg/secrets" "github.com/openshift/windows-machine-config-operator/pkg/signer" @@ -40,38 +39,6 @@ import ( //+kubebuilder:rbac:groups="certificates.k8s.io",resources=certificatesigningrequests,verbs=get;list;watch //+kubebuilder:rbac:groups="certificates.k8s.io",resources=signers,verbs=approve,resourceNames=kubernetes.io/kube-apiserver-client-kubelet;kubernetes.io/kubelet-serving -const ( - nodeGroup = "system:nodes" - nodeUserName = "system:node" - NodeUserNamePrefix = nodeUserName + ":" - systemPrefix = "system:authenticated" -) - -var ( - // kubeletClientUsages contains the permitted key usages from a kube-apiserver-client-kubelet signer - kubeletClientUsages = []certificates.KeyUsage{ - certificates.UsageKeyEncipherment, - certificates.UsageDigitalSignature, - certificates.UsageClientAuth, - } - // kubeletClientUsagesNoRSA contains the permitted client usages when kubelet is given a non-RSA key - kubeletClientUsagesNoRSA = []certificates.KeyUsage{ - certificates.UsageDigitalSignature, - certificates.UsageClientAuth, - } - // kubeletServerUsages contains the permitted key usages from a kubelet-serving signer - kubeletServerUsages = []certificates.KeyUsage{ - certificates.UsageKeyEncipherment, - certificates.UsageDigitalSignature, - certificates.UsageServerAuth, - } - // kubeletServerUsagesNoRSA contains the permitted server usages when kubelet is given a non-RSA key - kubeletServerUsagesNoRSA = []certificates.KeyUsage{ - certificates.UsageDigitalSignature, - certificates.UsageServerAuth, - } -) - // Approver holds the information required to approve a node CSR type Approver struct { // client is the cache client @@ -84,6 +51,10 @@ type Approver struct { recorder record.EventRecorder // namespace is the namespace in which CSR's are present namespace string + // clientValidator validates kubelet client certificates + clientValidator *csrvalidation.CSRValidator + // servingValidator validates kubelet serving certificates + servingValidator *csrvalidation.CSRValidator } // NewApprover returns a pointer to the Approver @@ -92,12 +63,21 @@ func NewApprover(client client.Client, clientSet *kubernetes.Clientset, csr *cer if client == nil || csr == nil || clientSet == nil { return nil, fmt.Errorf("kubernetes client, clientSet or CSR should not be nil") } - return &Approver{client, - clientSet, - csr, - log, - recorder, - watchNamespace}, nil + + // Create validators for kubelet types + clientValidator := csrvalidation.NewCSRValidator(client, csrvalidation.KubeletClientCertType) + servingValidator := csrvalidation.NewCSRValidator(client, csrvalidation.KubeletServingCertType) + + return &Approver{ + client: client, + k8sclientset: clientSet, + csr: csr, + log: log, + recorder: recorder, + namespace: watchNamespace, + clientValidator: clientValidator, + servingValidator: servingValidator, + }, nil } // Approve determines if a CSR should be approved by WMCO, and if so, approves it by updating its status. This function @@ -136,15 +116,62 @@ func (a *Approver) Approve(ctx context.Context) error { // If the CSR is not from a BYOH Windows instance, it returns false with no error. // If there is an error during validation, it returns false with the error. func (a *Approver) validateCSRContents(ctx context.Context) (bool, error) { - parsedCSR, err := ParseCSR(a.csr.Spec.Request) + parsedCSR, err := csrvalidation.ParseCSR(a.csr.Spec.Request) if err != nil { - return false, fmt.Errorf("error parsing CSR: %s: %w", a.csr.Name, err) + return false, fmt.Errorf("error parsing CSR %s: %w", a.csr.Name, err) } - nodeName := strings.TrimPrefix(parsedCSR.Subject.CommonName, NodeUserNamePrefix) - if nodeName == "" { - return false, fmt.Errorf("CSR %s subject name does not contain the required node user prefix: %s", - a.csr.Name, NodeUserNamePrefix) + if !strings.HasPrefix(parsedCSR.Subject.CommonName, csrvalidation.NodeUserNamePrefix) { + return false, nil + } + + if a.isNodeClientCert(parsedCSR) { + return a.validateKubeletClientCSR(ctx) + } + return a.validateKubeletServingCSR(ctx) +} + +// validateKubeletClientCSR validates kubelet client certificates +func (a *Approver) validateKubeletClientCSR(ctx context.Context) (bool, error) { + nodeName, err := a.clientValidator.GetNodeNameFromCSR(a.csr) + if err != nil { + return false, fmt.Errorf("error extracting node name from CSR %s: %w", a.csr.Name, err) + } + + valid, err := a.validateNodeName(ctx, nodeName) + if err != nil { + return false, fmt.Errorf("error validating node name %s for CSR: %s: %w", nodeName, a.csr.Name, err) + } + // CSR is not from a BYOH Windows instance, don't return error to avoid requeue, instead log if it is invalid + // as it might be from a linux node. + if !valid { + a.log.Info("CSR contents are invalid for approval by WMCO", "CSR", a.csr.Name) + return false, nil + } + + // Node client bootstrapper CSR is received before the instance becomes a node + // hence we should not proceed if a corresponding node already exists + node := &core.Node{} + err = a.client.Get(ctx, kubeTypes.NamespacedName{Namespace: a.namespace, Name: nodeName}, node) + if err != nil && !apierrors.IsNotFound(err) { + return false, fmt.Errorf("unable to get node %s: %w", nodeName, err) + } else if err == nil { + a.log.Info("node already exists, cannot validate CSR", "node", nodeName, "CSR", a.csr.Name) + return false, nil + } + + if err := a.clientValidator.ValidateCSR(ctx, a.csr); err != nil { + return false, fmt.Errorf("kubelet client CSR validation failed: %w", err) + } + + return true, nil +} + +// validateKubeletServingCSR validates kubelet serving certificates +func (a *Approver) validateKubeletServingCSR(ctx context.Context) (bool, error) { + nodeName, err := a.servingValidator.GetNodeNameFromCSR(a.csr) + if err != nil { + return false, fmt.Errorf("error extracting node name from CSR %s: %w", a.csr.Name, err) } // lookup the node name against the instance configMap addresses/host names @@ -158,25 +185,9 @@ func (a *Approver) validateCSRContents(ctx context.Context) (bool, error) { a.log.Info("CSR contents are invalid for approval by WMCO", "CSR", a.csr.Name) return false, nil } - // Kubelet on a node needs two certificates for its normal operation: - // Client certificate for securely communicating with the Kubernetes API server - // Server certificate for use by Kubernetes API server to talk back to kubelet - // Both types are validated based on their contents - if a.isNodeClientCert(parsedCSR) { - // Node client bootstrapper CSR is received before the instance becomes a node - // hence we should not proceed if a corresponding node already exists - node := &core.Node{} - err := a.client.Get(ctx, kubeTypes.NamespacedName{Namespace: a.namespace, - Name: nodeName}, node) - if err != nil && !apierrors.IsNotFound(err) { - return false, fmt.Errorf("unable to get node %s: %w", nodeName, err) - } else if err == nil { - return false, fmt.Errorf("%s node already exists, cannot validate CSR: %s", nodeName, a.csr.Name) - } - } else { - if err := a.validateKubeletServingCSR(parsedCSR); err != nil { - return false, fmt.Errorf("unable to validate kubelet serving CSR: %s: %w", a.csr.Name, err) - } + + if err := a.servingValidator.ValidateCSR(ctx, a.csr); err != nil { + return false, fmt.Errorf("kubelet serving CSR validation failed: %w", err) } return true, nil } @@ -231,73 +242,6 @@ func (a *Approver) validateWithHostName(ctx context.Context, nodeName string, wi return true, nil } -// validateKubeletServingCSR validates a kubelet serving CSR for its contents -func (a *Approver) validateKubeletServingCSR(parsedCsr *x509.CertificateRequest) error { - if a.csr == nil || parsedCsr == nil { - return fmt.Errorf("CSR or request should not be nil") - } - // Check groups, we need at least: system:nodes, system:authenticated - if len(a.csr.Spec.Groups) < 2 { - return fmt.Errorf("CSR %s contains invalid number of groups: %d", a.csr.Name, - len(a.csr.Spec.Groups)) - } - groups := sets.NewString(a.csr.Spec.Groups...) - if !groups.HasAll(nodeGroup, systemPrefix) { - return fmt.Errorf("CSR %s does not contain required groups", a.csr.Name) - } - - // Check usages, the list can include: digital signature, key encipherment and server auth - if !hasUsages(a.csr, kubeletServerUsages) && !hasUsages(a.csr, kubeletServerUsagesNoRSA) { - return fmt.Errorf("CSR %s does not contain required usages", a.csr.Name) - } - - var hasOrg bool - for i := range parsedCsr.Subject.Organization { - if parsedCsr.Subject.Organization[i] == nodeGroup { - hasOrg = true - break - } - } - if !hasOrg { - return fmt.Errorf("CSR %s does not contain required subject organization", a.csr.Name) - } - return nil -} - -// isNodeClientCert returns true if the CSR is from a kube-apiserver-client-kubelet signer -// reference: https://kubernetes.io/docs/reference/access-authn-authz/certificate-signing-requests/#kubernetes-signers -func (a *Approver) isNodeClientCert(x509cr *x509.CertificateRequest) bool { - if !reflect.DeepEqual([]string{nodeGroup}, x509cr.Subject.Organization) { - return false - } - if (len(x509cr.DNSNames) > 0) || (len(x509cr.EmailAddresses) > 0) || (len(x509cr.IPAddresses) > 0) { - return false - } - // Check usages, the list can include: digital signature, key encipherment and client auth - if !hasUsages(a.csr, kubeletClientUsages) && !hasUsages(a.csr, kubeletClientUsagesNoRSA) { - return false - } - return true -} - -// hasUsages verifies if the required usages exist in the CSR spec -func hasUsages(csr *certificates.CertificateSigningRequest, usages []certificates.KeyUsage) bool { - if csr == nil || len(csr.Spec.Usages) < 2 { - return false - } - usageMap := map[certificates.KeyUsage]struct{}{} - for _, u := range usages { - usageMap[u] = struct{}{} - } - - for _, u := range csr.Spec.Usages { - if _, ok := usageMap[u]; !ok { - return false - } - } - return true -} - // matchesHostname returns true if given node name matches with host name of any of the instances present // in the given instance list func matchesHostname(nodeName string, windowsInstances []*instance.Info, @@ -352,15 +296,15 @@ func matchesDNS(nodeName string, windowsInstances []*instance.Info) (bool, error return false, nil } -// ParseCSR extracts the CSR from the API object and decodes it. -func ParseCSR(csr []byte) (*x509.CertificateRequest, error) { - if len(csr) == 0 { - return nil, fmt.Errorf("CSR request spec should not be empty") +// isNodeClientCert returns true if the CSR is from a kube-apiserver-client-kubelet signer. +// Client certificates have no DNS names, email addresses, or IP addresses (no SAN). +// Reference: https://kubernetes.io/docs/reference/access-authn-authz/certificate-signing-requests/#kubernetes-signers +func (a *Approver) isNodeClientCert(x509cr *x509.CertificateRequest) bool { + if !reflect.DeepEqual([]string{csrvalidation.NodeGroup}, x509cr.Subject.Organization) { + return false } - // extract PEM from request object - block, _ := pem.Decode(csr) - if block == nil || block.Type != "CERTIFICATE REQUEST" { - return nil, fmt.Errorf("PEM block type must be CERTIFICATE REQUEST") + if (len(x509cr.DNSNames) > 0) || (len(x509cr.EmailAddresses) > 0) || (len(x509cr.IPAddresses) > 0) { + return false } - return x509.ParseCertificateRequest(block.Bytes) + return true } diff --git a/pkg/csr/csr_test.go b/pkg/csr/csr_test.go index 83e91973e1..74a2ee8b0a 100644 --- a/pkg/csr/csr_test.go +++ b/pkg/csr/csr_test.go @@ -6,6 +6,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "github.com/openshift/windows-machine-config-operator/pkg/csr/validation" "github.com/openshift/windows-machine-config-operator/pkg/instance" ) @@ -104,7 +105,7 @@ func TestParseCSR(t *testing.T) { } for _, test := range testCases { t.Run(test.name, func(t *testing.T) { - out, err := ParseCSR(test.input) + out, err := validation.ParseCSR(test.input) if test.expectedErr { assert.Error(t, err) assert.Nil(t, out) diff --git a/pkg/csr/validation/types.go b/pkg/csr/validation/types.go new file mode 100644 index 0000000000..1b5ee7511c --- /dev/null +++ b/pkg/csr/validation/types.go @@ -0,0 +1,48 @@ +package validation + +import ( + "github.com/openshift/windows-machine-config-operator/pkg/rbac" +) + +const ( + // Node certificate constants + NodeGroup = "system:nodes" + nodeUserName = "system:node" + systemPrefix = "system:authenticated" + NodeUserNamePrefix = nodeUserName + ":" +) + +var ( + // KubeletClientCertType defines validation rules for kubelet client certificates + KubeletClientCertType = CertificateType{ + Name: "kubelet-client", + UserPrefix: nodeUserName, + GroupName: NodeGroup, + RequiredGroups: []string{}, // Client certs don't require specific groups in CSR spec + ValidateNodeExists: false, // Node doesn't exist yet during bootstrapping + AllowDNSNames: false, + AllowIPAddresses: false, + } + + // KubeletServingCertType defines validation rules for kubelet serving certificates + KubeletServingCertType = CertificateType{ + Name: "kubelet-serving", + UserPrefix: nodeUserName, + GroupName: NodeGroup, + RequiredGroups: []string{NodeGroup, systemPrefix}, // Serving certs require specific groups + ValidateNodeExists: true, // Node must exist for serving certs + AllowDNSNames: true, // Serving certs can have DNS names + AllowIPAddresses: true, // Serving certs can have IP addresses + } + + // WICDCertType defines validation rules for WICD certificates + WICDCertType = CertificateType{ + Name: "wicd", + UserPrefix: rbac.WICDUserPrefix, + GroupName: rbac.WICDGroupName, + RequiredGroups: []string{}, // WICD CSRs come from service account, groups are set by K8s + ValidateNodeExists: true, // Node must exist for WICD certs + AllowDNSNames: false, // WICD certs don't need DNS names + AllowIPAddresses: false, // WICD certs don't need IP addresses + } +) diff --git a/pkg/csr/validation/validation.go b/pkg/csr/validation/validation.go new file mode 100644 index 0000000000..f293a8436f --- /dev/null +++ b/pkg/csr/validation/validation.go @@ -0,0 +1,251 @@ +package validation + +import ( + "context" + "crypto/x509" + "encoding/pem" + "fmt" + "strings" + + certificatesv1 "k8s.io/api/certificates/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/sets" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +var ( + // kubeletClientUsages contains the permitted key usages from a kube-apiserver-client-kubelet signer + kubeletClientUsages = []certificatesv1.KeyUsage{ + certificatesv1.UsageKeyEncipherment, + certificatesv1.UsageDigitalSignature, + certificatesv1.UsageClientAuth, + } + // kubeletClientUsagesNoRSA contains the permitted client usages when kubelet is given a non-RSA key + kubeletClientUsagesNoRSA = []certificatesv1.KeyUsage{ + certificatesv1.UsageDigitalSignature, + certificatesv1.UsageClientAuth, + } + // kubeletServerUsages contains the permitted key usages from a kubelet-serving signer + kubeletServerUsages = []certificatesv1.KeyUsage{ + certificatesv1.UsageKeyEncipherment, + certificatesv1.UsageDigitalSignature, + certificatesv1.UsageServerAuth, + } + // kubeletServerUsagesNoRSA contains the permitted server usages when kubelet is given a non-RSA key + kubeletServerUsagesNoRSA = []certificatesv1.KeyUsage{ + certificatesv1.UsageDigitalSignature, + certificatesv1.UsageServerAuth, + } + // wicdClientUsages contains the required usages for WICD client certificates + wicdClientUsages = []certificatesv1.KeyUsage{ + certificatesv1.UsageDigitalSignature, + certificatesv1.UsageClientAuth, + certificatesv1.UsageKeyEncipherment, + } +) + +// CertificateType defines the type of certificate being validated +type CertificateType struct { + // Name is used for logging and error messages + Name string + // UserPrefix is the expected prefix for the certificate common name (e.g., "system:node", "system:wicd-node") + UserPrefix string + // GroupName is the expected organization in the certificate (e.g., "system:nodes", "system:wicd-nodes") + GroupName string + // RequiredGroups are the required groups in the CSR spec (for kubelet serving certificates) + RequiredGroups []string + // ValidateNodeExists indicates whether to verify the node exists before approval + ValidateNodeExists bool + // AllowDNSNames indicates whether DNS names are allowed in the certificate + AllowDNSNames bool + // AllowIPAddresses indicates whether IP addresses are allowed in the certificate + AllowIPAddresses bool +} + +// CSRValidator provides common CSR validation functionality +type CSRValidator struct { + client client.Client + certType CertificateType +} + +// NewCSRValidator creates a new CSR validator for the given certificate type +func NewCSRValidator(client client.Client, certType CertificateType) *CSRValidator { + return &CSRValidator{ + client: client, + certType: certType, + } +} + +// ParseCSR extracts the CSR from the API object and decodes it +func ParseCSR(csrData []byte) (*x509.CertificateRequest, error) { + if len(csrData) == 0 { + return nil, fmt.Errorf("CSR request spec should not be empty") + } + // extract PEM from request object + block, _ := pem.Decode(csrData) + if block == nil || block.Type != "CERTIFICATE REQUEST" { + return nil, fmt.Errorf("PEM block type must be CERTIFICATE REQUEST") + } + return x509.ParseCertificateRequest(block.Bytes) +} + +// ValidateCSR validates a CSR according to the certificate type rules +func (v *CSRValidator) ValidateCSR(ctx context.Context, csr *certificatesv1.CertificateSigningRequest) error { + // Parse the certificate request + parsedCSR, err := ParseCSR(csr.Spec.Request) + if err != nil { + return fmt.Errorf("error parsing CSR %s: %w", csr.Name, err) + } + + nodeName := strings.TrimPrefix(parsedCSR.Subject.CommonName, v.certType.UserPrefix+":") + if nodeName == "" || nodeName == parsedCSR.Subject.CommonName { + return fmt.Errorf("CSR %s subject name does not contain the required prefix: %s", + csr.Name, v.certType.UserPrefix+":") + } + + if err := v.validateCertificateContent(parsedCSR); err != nil { + return fmt.Errorf("certificate content validation failed for CSR %s: %w", csr.Name, err) + } + + // Validate CSR spec (groups, usages) + if err := v.validateCSRSpec(csr, parsedCSR); err != nil { + return fmt.Errorf("CSR spec validation failed for CSR %s: %w", csr.Name, err) + } + + // Validate node exists if required for this type of CSR + if v.certType.ValidateNodeExists { + if err := v.validateNodeExists(ctx, nodeName); err != nil { + return fmt.Errorf("node validation failed for CSR %s: %w", csr.Name, err) + } + } + return nil +} + +// validateCertificateContent validates the parsed certificate request content +func (v *CSRValidator) validateCertificateContent(parsedCSR *x509.CertificateRequest) error { + // Validate organization + hasRequiredOrg := false + for _, org := range parsedCSR.Subject.Organization { + if org == v.certType.GroupName { + hasRequiredOrg = true + break + } + } + if !hasRequiredOrg { + return fmt.Errorf("certificate request missing required organization: %s", v.certType.GroupName) + } + + // Validate DNS names and IP addresses based on certificate type + if !v.certType.AllowDNSNames && len(parsedCSR.DNSNames) > 0 { + return fmt.Errorf("DNS names not allowed for %s certificates", v.certType.Name) + } + if !v.certType.AllowIPAddresses && len(parsedCSR.IPAddresses) > 0 { + return fmt.Errorf("IP addresses not allowed for %s certificates", v.certType.Name) + } + return nil +} + +// validateCSRSpec validates the CSR spec (groups, usages) +func (v *CSRValidator) validateCSRSpec(csr *certificatesv1.CertificateSigningRequest, parsedCSR *x509.CertificateRequest) error { + // Validate required groups if specified + if len(v.certType.RequiredGroups) > 0 { + if len(csr.Spec.Groups) < len(v.certType.RequiredGroups) { + return fmt.Errorf("CSR contains invalid number of groups: %d, expected at least %d", + len(csr.Spec.Groups), len(v.certType.RequiredGroups)) + } + groups := sets.NewString(csr.Spec.Groups...) + if !groups.HasAll(v.certType.RequiredGroups...) { + return fmt.Errorf("CSR does not contain required groups: %v", v.certType.RequiredGroups) + } + } + + // Validate key usages + if !v.hasValidUsages(csr) { + return fmt.Errorf("CSR does not contain valid usages for %s certificates", v.certType.Name) + } + + return nil +} + +// hasValidUsages verifies if the CSR has valid key usages for this certificate type +func (v *CSRValidator) hasValidUsages(csr *certificatesv1.CertificateSigningRequest) bool { + switch v.certType.Name { + case "kubelet-client": + return hasUsages(csr, kubeletClientUsages) || hasUsages(csr, kubeletClientUsagesNoRSA) + case "kubelet-serving": + return hasUsages(csr, kubeletServerUsages) || hasUsages(csr, kubeletServerUsagesNoRSA) + case "wicd": + return hasUsages(csr, wicdClientUsages) + default: + return false + } +} + +// validateNodeExists checks if the node exists in the cluster +func (v *CSRValidator) validateNodeExists(ctx context.Context, nodeName string) error { + node := &corev1.Node{} + if err := v.client.Get(ctx, types.NamespacedName{Name: nodeName}, node); err != nil { + if apierrors.IsNotFound(err) { + return fmt.Errorf("node %s does not exist", nodeName) + } + return fmt.Errorf("failed to get node %s: %w", nodeName, err) + } + return nil +} + +// GetNodeNameFromCSR extracts the node name from a CSR's common name +func (v *CSRValidator) GetNodeNameFromCSR(csr *certificatesv1.CertificateSigningRequest) (string, error) { + parsedCSR, err := ParseCSR(csr.Spec.Request) + if err != nil { + return "", fmt.Errorf("error parsing CSR: %w", err) + } + + nodeName := strings.TrimPrefix(parsedCSR.Subject.CommonName, v.certType.UserPrefix+":") + if nodeName == "" || nodeName == parsedCSR.Subject.CommonName { + return "", fmt.Errorf("invalid common name format: %s", parsedCSR.Subject.CommonName) + } + + return nodeName, nil +} + +// IsCorrectCertificateType checks if a CSR is for this certificate type +func (v *CSRValidator) IsCorrectCertificateType(csr *certificatesv1.CertificateSigningRequest) bool { + parsedCSR, err := ParseCSR(csr.Spec.Request) + if err != nil { + return false + } + + // Check if Common Name starts with the expected prefix + if !strings.HasPrefix(parsedCSR.Subject.CommonName, v.certType.UserPrefix+":") { + return false + } + + // Check if Organization includes the expected group + for _, org := range parsedCSR.Subject.Organization { + if org == v.certType.GroupName { + return true + } + } + + return false +} + +// hasUsages verifies if the required usages exist in the CSR spec +func hasUsages(csr *certificatesv1.CertificateSigningRequest, usages []certificatesv1.KeyUsage) bool { + if csr == nil || len(csr.Spec.Usages) < 2 { + return false + } + usageMap := map[certificatesv1.KeyUsage]struct{}{} + for _, u := range usages { + usageMap[u] = struct{}{} + } + + for _, u := range csr.Spec.Usages { + if _, ok := usageMap[u]; !ok { + return false + } + } + return true +} diff --git a/pkg/daemon/certs/wicd_cert_manager.go b/pkg/daemon/certs/wicd_cert_manager.go new file mode 100644 index 0000000000..1fa5b459b0 --- /dev/null +++ b/pkg/daemon/certs/wicd_cert_manager.go @@ -0,0 +1,144 @@ +//go:build windows + +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package certs + +import ( + "crypto/tls" + "crypto/x509" + "crypto/x509/pkix" + "fmt" + "path/filepath" + "time" + + certificatesv1 "k8s.io/api/certificates/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/clientcmd" + "k8s.io/client-go/util/certificate" + + "github.com/openshift/windows-machine-config-operator/pkg/rbac" +) + +// WICDCertificateManager manages WICD node certificates +type WICDCertificateManager struct { + started bool + currentCertPath string + certificateConfig *rest.Config + certManager certificate.Manager +} + +// NewWICDCertificateManager creates a new WICD certificate manager +func NewWICDCertificateManager(nodeName, certDir, bootstrapKubeconfigPath, apiServer string, certDuration time.Duration) (*WICDCertificateManager, error) { + bootstrapConfig, err := clientcmd.BuildConfigFromFlags("", bootstrapKubeconfigPath) + if err != nil { + return nil, fmt.Errorf("failed to load bootstrap kubeconfig from %s: %w", bootstrapKubeconfigPath, err) + } + + currentCertPath := filepath.Join(certDir, rbac.WICDCertNamePrefix+"-current.pem") + currentKeyPath := filepath.Join(certDir, rbac.WICDCertNamePrefix+"-current.pem") + + certificateConfig := &rest.Config{ + Host: apiServer, + TLSClientConfig: rest.TLSClientConfig{ + CertFile: currentCertPath, + KeyFile: currentKeyPath, + CAFile: bootstrapConfig.TLSClientConfig.CAFile, + CAData: bootstrapConfig.TLSClientConfig.CAData, + }, + } + + newClientsetFn := func(current *tls.Certificate) (kubernetes.Interface, error) { + cfg := bootstrapConfig + if current != nil { + cfg = certificateConfig + } + return kubernetes.NewForConfig(cfg) + } + + // Create certificate store + certificateStore, err := certificate.NewFileStore(rbac.WICDCertNamePrefix, certDir, certDir, "", "") + if err != nil { + return nil, fmt.Errorf("failed to initialize certificate store: %w", err) + } + + wicdCertUsages := []certificatesv1.KeyUsage{ + certificatesv1.UsageDigitalSignature, + certificatesv1.UsageClientAuth, + certificatesv1.UsageKeyEncipherment, + } + + certCommonName := fmt.Sprintf("%s:%s", rbac.WICDUserPrefix, nodeName) + certManager, err := certificate.NewManager(&certificate.Config{ + ClientsetFn: newClientsetFn, + Template: &x509.CertificateRequest{ + Subject: pkix.Name{ + CommonName: certCommonName, + Organization: []string{rbac.WICDGroupName}, + }, + }, + RequestedCertificateLifetime: &certDuration, + SignerName: certificatesv1.KubeAPIServerClientSignerName, + Usages: wicdCertUsages, + CertificateStore: certificateStore, + }) + if err != nil { + return nil, fmt.Errorf("failed to initialize certificate manager: %w", err) + } + + wcm := &WICDCertificateManager{ + currentCertPath: currentCertPath, + certificateConfig: certificateConfig, + certManager: certManager, + } + + return wcm, nil +} + +// StartCertificateManagement starts certificate management for WICD. +// The certificate manager automatically handles renewal at 80% of certificate lifetime. +// NOTE: Kubernetes does not support certificate revocation for CSR-issued certificates. +// Once issued, certificates remain valid until expiration. For security, certificates should have +// short lifetimes and be rotated frequently. +func (wcm *WICDCertificateManager) StartCertificateManagement() error { + if wcm.started { + return nil + } + + wcm.certManager.Start() + wcm.started = true + return nil +} + +// Stop stops the certificate manager +func (wcm *WICDCertificateManager) Stop() { + if wcm.certManager != nil { + wcm.certManager.Stop() + } + wcm.started = false +} + +// GetCertificatePaths returns the paths to the current certificate files +func (wcm *WICDCertificateManager) GetCertificatePaths() (certFile string) { + return wcm.currentCertPath +} + +// GetCertificateConfig returns the certificate-based rest.Config +func (wcm *WICDCertificateManager) GetCertificateConfig() *rest.Config { + return wcm.certificateConfig +} diff --git a/pkg/daemon/controller/controller.go b/pkg/daemon/controller/controller.go index 36c1f566ec..6600d45235 100644 --- a/pkg/daemon/controller/controller.go +++ b/pkg/daemon/controller/controller.go @@ -23,6 +23,7 @@ import ( "errors" "fmt" "net" + "os" "reflect" "strings" "time" @@ -63,7 +64,7 @@ import ( ) // WICDController is the name of the WICD controller in logs and other outputs -const WICDController = "WICD" +const WICDController = "windows-instance-config-daemon-controller" // Options contains a list of options available when creating a new ServiceController type Options struct { @@ -134,28 +135,51 @@ func (sc *ServiceController) Bootstrap(desiredVersion string) error { } // RunController is the entry point of WICD's controller functionality -func RunController(ctx context.Context, watchNamespace, kubeconfig, caBundle string) error { +func RunController(ctx context.Context, watchNamespace, kubeconfig, caBundle string, certDir string, certDuration time.Duration) error { cfg, err := clientcmd.BuildConfigFromFlags("", kubeconfig) if err != nil { - return err + return fmt.Errorf("failed to load WICD kubeconfig: %w", err) } // This is a client that reads directly from the server, not a cached client. This is required to be used here, as // the cached client, created by ctrl.NewManager() will not be functional until the manager is started. directClient, err := NewDirectClient(cfg) if err != nil { - return err + return fmt.Errorf("failed to create bootstrap client: %w", err) } addrs, err := LocalInterfaceAddresses() if err != nil { - return err + return fmt.Errorf("failed to get local interface addresses: %w", err) } node, err := GetAssociatedNode(ctx, directClient, addrs) if err != nil { return fmt.Errorf("could not find node object associated with this instance: %w", err) } - ctrlMgr, err := ctrl.NewManager(cfg, ctrl.Options{ + certManager, err := certs.NewWICDCertificateManager(node.Name, certDir, kubeconfig, cfg.Host, certDuration) + if err != nil { + return fmt.Errorf("failed to create certificate manager: %w", err) + } + + if err := certManager.StartCertificateManagement(); err != nil { + return fmt.Errorf("certificate management failed: %w", err) + } + defer certManager.Stop() + + certFile := certManager.GetCertificatePaths() + err = wait.PollUntilContextTimeout(ctx, time.Second*2, time.Minute*5, true, func(ctx context.Context) (bool, error) { + if _, err := os.Stat(certFile); err != nil { + klog.Infof("Certificate file not yet available: %v", err) + return false, nil + } + return true, nil + }) + if err != nil { + return fmt.Errorf("timeout waiting for certificate files to be created: %w", err) + } + + certificateConfig := certManager.GetCertificateConfig() + ctrlMgr, err := ctrl.NewManager(certificateConfig, ctrl.Options{ Cache: cache.Options{ DefaultNamespaces: map[string]cache.Config{ watchNamespace: {}, diff --git a/pkg/nodeconfig/nodeconfig.go b/pkg/nodeconfig/nodeconfig.go index aff3826256..1f8c54b59d 100644 --- a/pkg/nodeconfig/nodeconfig.go +++ b/pkg/nodeconfig/nodeconfig.go @@ -35,6 +35,7 @@ import ( "github.com/openshift/windows-machine-config-operator/pkg/instance" "github.com/openshift/windows-machine-config-operator/pkg/metadata" "github.com/openshift/windows-machine-config-operator/pkg/nodeutil" + "github.com/openshift/windows-machine-config-operator/pkg/rbac" "github.com/openshift/windows-machine-config-operator/pkg/registries" "github.com/openshift/windows-machine-config-operator/pkg/retry" "github.com/openshift/windows-machine-config-operator/pkg/secrets" @@ -217,7 +218,7 @@ func (nc *NodeConfig) Configure(ctx context.Context) error { // Now that the node has been fully configured, update the node object in NodeConfig once more if err := nc.setNode(ctx, false); err != nil { - return fmt.Errorf("error getting node object: %w", err) + return fmt.Errorf("error setting node object: %w", err) } // Uncordon the node now that it is fully configured @@ -244,7 +245,7 @@ func (nc *NodeConfig) Configure(ctx context.Context) error { return err } -// safeReboot safely restarts the underlying instance, first cordoning and draining the associated node. +// SafeReboot safely restarts the underlying instance, first cordoning and draining the associated node. // Waits for reboot to take effect before uncordoning the node. func (nc *NodeConfig) SafeReboot(ctx context.Context) error { if nc.node == nil { @@ -255,8 +256,8 @@ func (nc *NodeConfig) SafeReboot(ctx context.Context) error { if err := drain.RunCordonOrUncordon(drainer, nc.node, true); err != nil { return fmt.Errorf("unable to cordon node %s: %w", nc.node.Name, err) } - if err := drain.RunNodeDrain(drainer, nc.node.Name); err != nil { - return fmt.Errorf("unable to drain node %s: %w", nc.node.Name, err) + if err := drain.RunNodeDrain(drainer, nc.node.GetName()); err != nil { + return fmt.Errorf("unable to drain node %s: %w", nc.node.GetName(), err) } if err := nc.Windows.RebootAndReinitialize(ctx); err != nil { @@ -403,6 +404,10 @@ func (nc *NodeConfig) generateBootstrapKubeconfig(ctx context.Context) (string, // generateWICDKubeconfig returns the contents of a kubeconfig created from the WICD ServiceAccount func (nc *NodeConfig) generateWICDKubeconfig(ctx context.Context) (string, error) { + if certKubeconfig, err := nc.generateWICDCertificateKubeconfig(ctx); err == nil { + return certKubeconfig, nil + } + wicdSASecret, err := nc.getWICDServiceAccountSecret(ctx) if err != nil { return "", err @@ -410,6 +415,39 @@ func (nc *NodeConfig) generateWICDKubeconfig(ctx context.Context) (string, error return newKubeconfigFromSecret(wicdSASecret, "wicd") } +// generateWICDCertificateKubeconfig creates a kubeconfig using certificate-based authentication +func (nc *NodeConfig) generateWICDCertificateKubeconfig(ctx context.Context) (string, error) { + if nc.node == nil { + return "", fmt.Errorf("node not available") + } + + // Check if certificate file exists on the Windows node + checkCertCmd := fmt.Sprintf("Test-Path %s", windows.WICDCurrentCertPath) + if out, err := nc.Windows.Run(checkCertCmd, true); err != nil || strings.TrimSpace(out) != "True" { + return "", fmt.Errorf("WICD certificate not found at %s", windows.WICDCurrentCertPath) + } + + // Get CA certificate from the cluster + wicdSASecret, err := nc.getWICDServiceAccountSecret(ctx) + if err != nil { + return "", fmt.Errorf("failed to get CA cert for certificate-based kubeconfig: %w", err) + } + + caCert := wicdSASecret.Data[core.ServiceAccountRootCAKey] + if caCert == nil { + return "", fmt.Errorf("unable to find CA cert in secret") + } + + username := fmt.Sprintf("%s:%s", rbac.WICDUserPrefix, nc.node.Name) + kc := generateCertificateKubeconfig(caCert, windows.WICDCurrentCertPath, nodeConfigCache.apiServerEndpoint, username) + + kubeconfigData, err := json.Marshal(kc) + if err != nil { + return "", err + } + return string(kubeconfigData), nil +} + // newKubeconfigFromSecret returns the contents of a kubeconfig generated from the given service account token secret func newKubeconfigFromSecret(saSecret *core.Secret, username string) (string, error) { // extract ca.crt and token data fields @@ -634,6 +672,35 @@ func generateKubeconfig(caCert []byte, token, apiServerURL, username string) cli return kubeconfig } +// generateCertificateKubeconfig creates a WICD kubeconfig using certificate-based authentication +func generateCertificateKubeconfig(caCert []byte, certPath, apiServerURL, username string) clientcmdv1.Config { + kubeconfig := clientcmdv1.Config{ + Clusters: []clientcmdv1.NamedCluster{{ + Name: "local", + Cluster: clientcmdv1.Cluster{ + Server: apiServerURL, + CertificateAuthorityData: caCert, + }}, + }, + AuthInfos: []clientcmdv1.NamedAuthInfo{{ + Name: username, + AuthInfo: clientcmdv1.AuthInfo{ + ClientCertificate: certPath, + ClientKey: certPath, + }, + }}, + Contexts: []clientcmdv1.NamedContext{{ + Name: username, + Context: clientcmdv1.Context{ + Cluster: "local", + AuthInfo: username, + }, + }}, + CurrentContext: username, + } + return kubeconfig +} + // generateKubeletConfiguration returns the configuration spec for the kubelet Windows service func generateKubeletConfiguration(clusterDNS string) kubeletconfig.KubeletConfiguration { // default numeric values chosen based on the OpenShift kubelet config recommendations for Linux worker nodes @@ -771,8 +838,5 @@ func validWICDServiceAccountTokenSecret(secret core.Secret) bool { if secret.Type != core.SecretTypeServiceAccountToken { return false } - if secret.Annotations[core.ServiceAccountNameKey] != windows.WicdServiceName { - return false - } - return true + return secret.Annotations[core.ServiceAccountNameKey] == windows.WicdServiceName } diff --git a/pkg/rbac/rbac.go b/pkg/rbac/rbac.go new file mode 100644 index 0000000000..76716aac34 --- /dev/null +++ b/pkg/rbac/rbac.go @@ -0,0 +1,9 @@ +package rbac + +const ( + WICDCertNamePrefix = "wicd-client" + // WICDGroupName is the group name for all WICD certificate users + WICDGroupName = "system:wicd-nodes" + // WICDUserPrefix is the prefix for WICD certificate usernames + WICDUserPrefix = "system:wicd-node" +) diff --git a/pkg/windows/windows.go b/pkg/windows/windows.go index 552eef1311..e30d078856 100644 --- a/pkg/windows/windows.go +++ b/pkg/windows/windows.go @@ -139,6 +139,10 @@ const ( containersFeatureName = "Containers" // WICDKubeconfigPath is the path of the kubeconfig used by WICD WICDKubeconfigPath = K8sDir + "\\wicd-kubeconfig" + // WICDCertDir is the directory for storing WICD client certificates + WICDCertDir = K8sDir + "\\wicd-certs" + // WICDCurrentCertPath is the path to the current WICD client certificate + WICDCurrentCertPath = WICDCertDir + "\\wicd-client-current.pem" // TrustedCABundlePath is the location of the trusted CA bundle file TrustedCABundlePath = K8sDir + "\\ca-bundle.crt" // GetHostnameFQDNCommand is the PowerShell command to get the FQDN hostname of the Windows instance @@ -181,6 +185,7 @@ var ( podManifestDirectory, K8sDir, TLSDir, + WICDCertDir, } ) @@ -237,7 +242,11 @@ func GetK8sDir() string { return K8sDir } -// Windows contains all the methods needed to configure a Windows VM to become a worker node +const ( + // WICDCertDuration is the default certificate lifetime for WICD + WICDCertDuration = "1h" +) + type Windows interface { // GetIPv4Address returns the IPv4 address of the associated instance. GetIPv4Address() string @@ -566,9 +575,8 @@ func (vm *windows) ConfigureWICD(watchNamespace, wicdKubeconfigContents string) if err := vm.ensureWICDFilesExist(wicdKubeconfigContents); err != nil { return err } - wicdServiceArgs := fmt.Sprintf("controller --windows-service --log-dir %s --kubeconfig %s --namespace %s", - wicdLogDir, WICDKubeconfigPath, watchNamespace) - wicdServiceArgs = fmt.Sprintf("%s --ca-bundle %s", wicdServiceArgs, TrustedCABundlePath) + wicdServiceArgs := fmt.Sprintf("controller --windows-service --log-dir %s --namespace %s --kubeconfig %s --cert-dir %s --cert-duration %s --ca-bundle %s", + wicdLogDir, watchNamespace, WICDKubeconfigPath, WICDCertDir, WICDCertDuration, TrustedCABundlePath) // if WICD crashes, attempt to restart WICD after 10, 30, and 60 seconds, and then every 2 minutes after that. // reset this counter 5 min after a period with no crashes recoveryActions := []recoveryAction{ diff --git a/test/e2e/create_test.go b/test/e2e/create_test.go index d4f9da0e78..f01e5f2734 100644 --- a/test/e2e/create_test.go +++ b/test/e2e/create_test.go @@ -66,6 +66,7 @@ func creationTestSuite(t *testing.T) { t.Run("Services running", tc.testExpectedServicesRunning) t.Run("NodeTaint validation", tc.testNodeTaint) t.Run("CSR Validation", tc.testCSRApproval) + t.Run("WICD CSR Approval", tc.testWICDCSRApproval) t.Run("Certificates", tc.testCertificates) t.Run("Node Logs", tc.testNodeLogs) t.Run("Metrics validation", tc.testMetrics) diff --git a/test/e2e/validation_test.go b/test/e2e/validation_test.go index 8cac79d606..83cb142fd5 100644 --- a/test/e2e/validation_test.go +++ b/test/e2e/validation_test.go @@ -28,7 +28,7 @@ import ( "github.com/openshift/windows-machine-config-operator/controllers" "github.com/openshift/windows-machine-config-operator/pkg/condition" "github.com/openshift/windows-machine-config-operator/pkg/crypto" - "github.com/openshift/windows-machine-config-operator/pkg/csr" + "github.com/openshift/windows-machine-config-operator/pkg/csr/validation" "github.com/openshift/windows-machine-config-operator/pkg/metadata" nc "github.com/openshift/windows-machine-config-operator/pkg/nodeconfig" "github.com/openshift/windows-machine-config-operator/pkg/retry" @@ -722,11 +722,11 @@ func (tc *testContext) findNodeCSRs(nodeName string) ([]certificates.Certificate if c.Status.Conditions == nil || len(c.Status.Conditions) == 0 { continue } - parsedCSR, err := csr.ParseCSR(c.Spec.Request) + parsedCSR, err := validation.ParseCSR(c.Spec.Request) if err != nil { return nil, err } - dnsAddr := strings.TrimPrefix(parsedCSR.Subject.CommonName, csr.NodeUserNamePrefix) + dnsAddr := strings.TrimPrefix(parsedCSR.Subject.CommonName, validation.NodeUserNamePrefix) if dnsAddr == "" { return nil, err } diff --git a/test/e2e/wicd_rbac_test.go b/test/e2e/wicd_rbac_test.go new file mode 100644 index 0000000000..dee33646bb --- /dev/null +++ b/test/e2e/wicd_rbac_test.go @@ -0,0 +1,126 @@ +package e2e + +import ( + "context" + "fmt" + "testing" + + "github.com/stretchr/testify/require" + certificatesv1 "k8s.io/api/certificates/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/openshift/windows-machine-config-operator/pkg/csr/validation" + "github.com/openshift/windows-machine-config-operator/pkg/rbac" + "github.com/openshift/windows-machine-config-operator/pkg/windows" +) + +// testWICDCSRApproval tests that WICD CSRs created during node configuration are properly approved +func (tc *testContext) testWICDCSRApproval(t *testing.T) { + // Need at least one Windows node to run these tests, throwing error if this condition is not met + require.Greater(t, len(gc.allNodes()), 0, "test requires at least one Windows node to run") + + // Look for WICD CSRs that should have been created during node configuration + for _, node := range gc.allNodes() { + t.Run(node.Name, func(t *testing.T) { + wicdCSRs, err := findWICDCSRs(tc, node.Name) + if err != nil { + t.Fatalf("Failed to search for WICD CSRs for node %s: %v", node.Name, err) + } + + if len(wicdCSRs) == 0 { + t.Logf("No WICD CSRs found for node %s", node.Name) + return + } + t.Logf("Found %d WICD CSR(s) for node %s", len(wicdCSRs), node.Name) + for _, csr := range wicdCSRs { + t.Run(csr.Name, func(t *testing.T) { + // Verify it was approved by WICD controller with correct reason + approvedByWICD := false + for _, condition := range csr.Status.Conditions { + if condition.Type == certificatesv1.CertificateApproved && + condition.Reason == "WICDAutoApproved" { + approvedByWICD = true + break + } + } + if !approvedByWICD { + t.Errorf("WICD CSR %s should be approved by WICD controller with reason 'WICDAutoApproved'", + csr.Name) + } + + // Verify CSR username matches expected WICD identity format + expectedCertUsername := "system:wicd-node:" + node.Name + expectedSAUsername := fmt.Sprintf("system:serviceaccount:%s:%s", wmcoNamespace, windows.WicdServiceName) + + if csr.Spec.Username != expectedCertUsername && csr.Spec.Username != expectedSAUsername { + t.Errorf("WICD CSR %s username should be '%s' or '%s', got '%s'", + csr.Name, expectedCertUsername, expectedSAUsername, csr.Spec.Username) + } + + if csr.Spec.Username == expectedCertUsername { + // Certificate-based CSRs should have WICD group + hasWICDGroup := false + for _, group := range csr.Spec.Groups { + if group == rbac.WICDGroupName { + hasWICDGroup = true + break + } + } + if !hasWICDGroup { + t.Errorf("WICD certificate CSR %s should include group '%s', got groups: %v", + csr.Name, rbac.WICDGroupName, csr.Spec.Groups) + } + } + t.Logf("WICD CSR %s validated successfully - approved by WICD controller for node %s", + csr.Name, node.Name) + }) + } + }) + } +} + +// findWICDCSRs finds CSRs that match WICD identity patterns (ServiceAccount or certificate-based) for the given node +func findWICDCSRs(testCtx *testContext, nodeName string) ([]certificatesv1.CertificateSigningRequest, error) { + var wicdCSRs []certificatesv1.CertificateSigningRequest + csrs, err := testCtx.client.K8s.CertificatesV1().CertificateSigningRequests().List(context.TODO(), + metav1.ListOptions{}) + if err != nil { + return nil, err + } + expectedCertUsername := "system:wicd-node:" + nodeName + expectedSAUsername := fmt.Sprintf("system:serviceaccount:%s:%s", wmcoNamespace, windows.WicdServiceName) + for _, csr := range csrs.Items { + // Check if CSR matches either WICD identity (certificate-based or ServiceAccount) + if csr.Spec.Username == expectedCertUsername || csr.Spec.Username == expectedSAUsername { + if isValidWICDCSR(&csr, nodeName) { + wicdCSRs = append(wicdCSRs, csr) + } + } + } + + return wicdCSRs, nil +} + +// isValidWICDCSR validates that a CSR is actually from WICD by checking the certificate content +func isValidWICDCSR(csr *certificatesv1.CertificateSigningRequest, nodeName string) bool { + // Parse the CSR to validate its content + parsedCSR, err := validation.ParseCSR(csr.Spec.Request) + if err != nil { + return false + } + + // Check if the Common Name matches the expected WICD certificate identity + expectedCommonName := fmt.Sprintf("%s:%s", rbac.WICDUserPrefix, nodeName) + if parsedCSR.Subject.CommonName != expectedCommonName { + return false + } + + // Check if the Organization includes the WICD group + for _, org := range parsedCSR.Subject.Organization { + if org == rbac.WICDGroupName { + return true + } + } + + return false +} diff --git a/vendor/k8s.io/client-go/tools/watch/informerwatcher.go b/vendor/k8s.io/client-go/tools/watch/informerwatcher.go new file mode 100644 index 0000000000..114abfcc9b --- /dev/null +++ b/vendor/k8s.io/client-go/tools/watch/informerwatcher.go @@ -0,0 +1,166 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package watch + +import ( + "sync" + + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/apimachinery/pkg/watch" + "k8s.io/client-go/tools/cache" + "k8s.io/klog/v2" +) + +func newEventProcessor(out chan<- watch.Event) *eventProcessor { + return &eventProcessor{ + out: out, + cond: sync.NewCond(&sync.Mutex{}), + done: make(chan struct{}), + } +} + +// eventProcessor buffers events and writes them to an out chan when a reader +// is waiting. Because of the requirement to buffer events, it synchronizes +// input with a condition, and synchronizes output with a channels. It needs to +// be able to yield while both waiting on an input condition and while blocked +// on writing to the output channel. +type eventProcessor struct { + out chan<- watch.Event + + cond *sync.Cond + buff []watch.Event + + done chan struct{} +} + +func (e *eventProcessor) run() { + for { + batch := e.takeBatch() + e.writeBatch(batch) + if e.stopped() { + return + } + } +} + +func (e *eventProcessor) takeBatch() []watch.Event { + e.cond.L.Lock() + defer e.cond.L.Unlock() + + for len(e.buff) == 0 && !e.stopped() { + e.cond.Wait() + } + + batch := e.buff + e.buff = nil + return batch +} + +func (e *eventProcessor) writeBatch(events []watch.Event) { + for _, event := range events { + select { + case e.out <- event: + case <-e.done: + return + } + } +} + +func (e *eventProcessor) push(event watch.Event) { + e.cond.L.Lock() + defer e.cond.L.Unlock() + defer e.cond.Signal() + e.buff = append(e.buff, event) +} + +func (e *eventProcessor) stopped() bool { + select { + case <-e.done: + return true + default: + return false + } +} + +func (e *eventProcessor) stop() { + close(e.done) + e.cond.Signal() +} + +// NewIndexerInformerWatcher will create an IndexerInformer and wrap it into watch.Interface +// so you can use it anywhere where you'd have used a regular Watcher returned from Watch method. +// it also returns a channel you can use to wait for the informers to fully shutdown. +// +// Contextual logging: NewIndexerInformerWatcherWithLogger should be used instead of NewIndexerInformerWatcher in code which supports contextual logging. +func NewIndexerInformerWatcher(lw cache.ListerWatcher, objType runtime.Object) (cache.Indexer, cache.Controller, watch.Interface, <-chan struct{}) { + return NewIndexerInformerWatcherWithLogger(klog.Background(), lw, objType) +} + +// NewIndexerInformerWatcherWithLogger will create an IndexerInformer and wrap it into watch.Interface +// so you can use it anywhere where you'd have used a regular Watcher returned from Watch method. +// it also returns a channel you can use to wait for the informers to fully shutdown. +func NewIndexerInformerWatcherWithLogger(logger klog.Logger, lw cache.ListerWatcher, objType runtime.Object) (cache.Indexer, cache.Controller, watch.Interface, <-chan struct{}) { + ch := make(chan watch.Event) + w := watch.NewProxyWatcher(ch) + e := newEventProcessor(ch) + + indexer, informer := cache.NewIndexerInformer(lw, objType, 0, cache.ResourceEventHandlerFuncs{ + AddFunc: func(obj interface{}) { + e.push(watch.Event{ + Type: watch.Added, + Object: obj.(runtime.Object), + }) + }, + UpdateFunc: func(old, new interface{}) { + e.push(watch.Event{ + Type: watch.Modified, + Object: new.(runtime.Object), + }) + }, + DeleteFunc: func(obj interface{}) { + staleObj, stale := obj.(cache.DeletedFinalStateUnknown) + if stale { + // We have no means of passing the additional information down using + // watch API based on watch.Event but the caller can filter such + // objects by checking if metadata.deletionTimestamp is set + obj = staleObj.Obj + } + + e.push(watch.Event{ + Type: watch.Deleted, + Object: obj.(runtime.Object), + }) + }, + }, cache.Indexers{}) + + // This will get stopped, but without waiting for it. + go e.run() + + doneCh := make(chan struct{}) + go func() { + defer close(doneCh) + defer e.stop() + // Waiting for w.StopChan() is the traditional behavior which gets + // preserved here, with the logger added to support contextual logging. + ctx := wait.ContextForChannel(w.StopChan()) + ctx = klog.NewContext(ctx, logger) + informer.RunWithContext(ctx) + }() + + return indexer, informer, w, doneCh +} diff --git a/vendor/k8s.io/client-go/tools/watch/retrywatcher.go b/vendor/k8s.io/client-go/tools/watch/retrywatcher.go new file mode 100644 index 0000000000..45249d8e47 --- /dev/null +++ b/vendor/k8s.io/client-go/tools/watch/retrywatcher.go @@ -0,0 +1,327 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package watch + +import ( + "context" + "errors" + "fmt" + "io" + "net/http" + "time" + + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/dump" + "k8s.io/apimachinery/pkg/util/net" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/apimachinery/pkg/watch" + "k8s.io/client-go/tools/cache" + "k8s.io/klog/v2" +) + +// resourceVersionGetter is an interface used to get resource version from events. +// We can't reuse an interface from meta otherwise it would be a cyclic dependency and we need just this one method +type resourceVersionGetter interface { + GetResourceVersion() string +} + +// RetryWatcher will make sure that in case the underlying watcher is closed (e.g. due to API timeout or etcd timeout) +// it will get restarted from the last point without the consumer even knowing about it. +// RetryWatcher does that by inspecting events and keeping track of resourceVersion. +// Especially useful when using watch.UntilWithoutRetry where premature termination is causing issues and flakes. +// Please note that this is not resilient to etcd cache not having the resource version anymore - you would need to +// use Informers for that. +type RetryWatcher struct { + cancel func(error) + lastResourceVersion string + watcherClient cache.WatcherWithContext + resultChan chan watch.Event + doneChan chan struct{} + minRestartDelay time.Duration +} + +// NewRetryWatcher creates a new RetryWatcher. +// It will make sure that watches gets restarted in case of recoverable errors. +// The initialResourceVersion will be given to watch method when first called. +// +// Deprecated: use NewRetryWatcherWithContext instead. +func NewRetryWatcher(initialResourceVersion string, watcherClient cache.Watcher) (*RetryWatcher, error) { + return NewRetryWatcherWithContext(context.Background(), initialResourceVersion, cache.ToWatcherWithContext(watcherClient)) +} + +// NewRetryWatcherWithContext creates a new RetryWatcher. +// It will make sure that watches gets restarted in case of recoverable errors. +// The initialResourceVersion will be given to watch method when first called. +func NewRetryWatcherWithContext(ctx context.Context, initialResourceVersion string, watcherClient cache.WatcherWithContext) (*RetryWatcher, error) { + return newRetryWatcher(ctx, initialResourceVersion, watcherClient, 1*time.Second) +} + +func newRetryWatcher(ctx context.Context, initialResourceVersion string, watcherClient cache.WatcherWithContext, minRestartDelay time.Duration) (*RetryWatcher, error) { + switch initialResourceVersion { + case "", "0": + // TODO: revisit this if we ever get WATCH v2 where it means start "now" + // without doing the synthetic list of objects at the beginning (see #74022) + return nil, fmt.Errorf("initial RV %q is not supported due to issues with underlying WATCH", initialResourceVersion) + default: + break + } + + ctx, cancel := context.WithCancelCause(ctx) + + rw := &RetryWatcher{ + cancel: cancel, + lastResourceVersion: initialResourceVersion, + watcherClient: watcherClient, + doneChan: make(chan struct{}), + resultChan: make(chan watch.Event, 0), + minRestartDelay: minRestartDelay, + } + + go rw.receive(ctx) + return rw, nil +} + +func (rw *RetryWatcher) send(ctx context.Context, event watch.Event) bool { + // Writing to an unbuffered channel is blocking operation + // and we need to check if stop wasn't requested while doing so. + select { + case rw.resultChan <- event: + return true + case <-ctx.Done(): + return false + } +} + +// doReceive returns true when it is done, false otherwise. +// If it is not done the second return value holds the time to wait before calling it again. +func (rw *RetryWatcher) doReceive(ctx context.Context) (bool, time.Duration) { + watcher, err := rw.watcherClient.WatchWithContext(ctx, metav1.ListOptions{ + ResourceVersion: rw.lastResourceVersion, + AllowWatchBookmarks: true, + }) + // We are very unlikely to hit EOF here since we are just establishing the call, + // but it may happen that the apiserver is just shutting down (e.g. being restarted) + // This is consistent with how it is handled for informers + switch err { + case nil: + break + + case io.EOF: + // watch closed normally + return false, 0 + + case io.ErrUnexpectedEOF: + klog.FromContext(ctx).V(1).Info("Watch closed with unexpected EOF", "err", err) + return false, 0 + + default: + msg := "Watch failed" + if net.IsProbableEOF(err) || net.IsTimeout(err) { + klog.FromContext(ctx).V(5).Info(msg, "err", err) + // Retry + return false, 0 + } + + // Check if the watch failed due to the client not having permission to watch the resource or the credentials + // being invalid (e.g. expired token). + if apierrors.IsForbidden(err) || apierrors.IsUnauthorized(err) { + // Add more detail since the forbidden message returned by the Kubernetes API is just "unknown". + klog.FromContext(ctx).Error(err, msg+": ensure the client has valid credentials and watch permissions on the resource") + + if apiStatus, ok := err.(apierrors.APIStatus); ok { + statusErr := apiStatus.Status() + + sent := rw.send(ctx, watch.Event{ + Type: watch.Error, + Object: &statusErr, + }) + if !sent { + // This likely means the RetryWatcher is stopping but return false so the caller to doReceive can + // verify this and potentially retry. + klog.FromContext(ctx).Error(nil, "Failed to send the Unauthorized or Forbidden watch event") + + return false, 0 + } + } else { + // This should never happen since apierrors only handles apierrors.APIStatus. Still, this is an + // unrecoverable error, so still allow it to return true below. + klog.FromContext(ctx).Error(err, msg+": encountered an unexpected Unauthorized or Forbidden error type") + } + + return true, 0 + } + + klog.FromContext(ctx).Error(err, msg) + // Retry + return false, 0 + } + + if watcher == nil { + klog.FromContext(ctx).Error(nil, "Watch returned nil watcher") + // Retry + return false, 0 + } + + ch := watcher.ResultChan() + defer watcher.Stop() + + for { + select { + case <-ctx.Done(): + klog.FromContext(ctx).V(4).Info("Stopping RetryWatcher") + return true, 0 + case event, ok := <-ch: + if !ok { + klog.FromContext(ctx).V(4).Info("Failed to get event - re-creating the watcher", "resourceVersion", rw.lastResourceVersion) + return false, 0 + } + + // We need to inspect the event and get ResourceVersion out of it + switch event.Type { + case watch.Added, watch.Modified, watch.Deleted, watch.Bookmark: + metaObject, ok := event.Object.(resourceVersionGetter) + if !ok { + _ = rw.send(ctx, watch.Event{ + Type: watch.Error, + Object: &apierrors.NewInternalError(errors.New("retryWatcher: doesn't support resourceVersion")).ErrStatus, + }) + // We have to abort here because this might cause lastResourceVersion inconsistency by skipping a potential RV with valid data! + return true, 0 + } + + resourceVersion := metaObject.GetResourceVersion() + if resourceVersion == "" { + _ = rw.send(ctx, watch.Event{ + Type: watch.Error, + Object: &apierrors.NewInternalError(fmt.Errorf("retryWatcher: object %#v doesn't support resourceVersion", event.Object)).ErrStatus, + }) + // We have to abort here because this might cause lastResourceVersion inconsistency by skipping a potential RV with valid data! + return true, 0 + } + + // All is fine; send the non-bookmark events and update resource version. + if event.Type != watch.Bookmark { + ok = rw.send(ctx, event) + if !ok { + return true, 0 + } + } + rw.lastResourceVersion = resourceVersion + + continue + + case watch.Error: + // This round trip allows us to handle unstructured status + errObject := apierrors.FromObject(event.Object) + statusErr, ok := errObject.(*apierrors.StatusError) + if !ok { + klog.FromContext(ctx).Error(nil, "Received an error which is not *metav1.Status", "errorObject", dump.Pretty(event.Object)) + // Retry unknown errors + return false, 0 + } + + status := statusErr.ErrStatus + + statusDelay := time.Duration(0) + if status.Details != nil { + statusDelay = time.Duration(status.Details.RetryAfterSeconds) * time.Second + } + + switch status.Code { + case http.StatusGone: + // Never retry RV too old errors + _ = rw.send(ctx, event) + return true, 0 + + case http.StatusGatewayTimeout, http.StatusInternalServerError: + // Retry + return false, statusDelay + + default: + // We retry by default. RetryWatcher is meant to proceed unless it is certain + // that it can't. If we are not certain, we proceed with retry and leave it + // up to the user to timeout if needed. + + // Log here so we have a record of hitting the unexpected error + // and we can whitelist some error codes if we missed any that are expected. + klog.FromContext(ctx).V(5).Info("Retrying after unexpected error", "errorObject", dump.Pretty(event.Object)) + + // Retry + return false, statusDelay + } + + default: + klog.FromContext(ctx).Error(nil, "Failed to recognize event", "type", event.Type) + _ = rw.send(ctx, watch.Event{ + Type: watch.Error, + Object: &apierrors.NewInternalError(fmt.Errorf("retryWatcher failed to recognize Event type %q", event.Type)).ErrStatus, + }) + // We are unable to restart the watch and have to stop the loop or this might cause lastResourceVersion inconsistency by skipping a potential RV with valid data! + return true, 0 + } + } + } +} + +// receive reads the result from a watcher, restarting it if necessary. +func (rw *RetryWatcher) receive(ctx context.Context) { + defer close(rw.doneChan) + defer close(rw.resultChan) + + logger := klog.FromContext(ctx) + logger.V(4).Info("Starting RetryWatcher") + defer logger.V(4).Info("Stopping RetryWatcher") + + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + // We use non sliding until so we don't introduce delays on happy path when WATCH call + // timeouts or gets closed and we need to reestablish it while also avoiding hot loops. + wait.NonSlidingUntilWithContext(ctx, func(ctx context.Context) { + done, retryAfter := rw.doReceive(ctx) + if done { + cancel() + return + } + + timer := time.NewTimer(retryAfter) + select { + case <-ctx.Done(): + timer.Stop() + return + case <-timer.C: + } + + logger.V(4).Info("Restarting RetryWatcher", "resourceVersion", rw.lastResourceVersion) + }, rw.minRestartDelay) +} + +// ResultChan implements Interface. +func (rw *RetryWatcher) ResultChan() <-chan watch.Event { + return rw.resultChan +} + +// Stop implements Interface. +func (rw *RetryWatcher) Stop() { + rw.cancel(errors.New("asked to stop")) +} + +// Done allows the caller to be notified when Retry watcher stops. +func (rw *RetryWatcher) Done() <-chan struct{} { + return rw.doneChan +} diff --git a/vendor/k8s.io/client-go/tools/watch/until.go b/vendor/k8s.io/client-go/tools/watch/until.go new file mode 100644 index 0000000000..03ceaf002d --- /dev/null +++ b/vendor/k8s.io/client-go/tools/watch/until.go @@ -0,0 +1,168 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package watch + +import ( + "context" + "errors" + "fmt" + "time" + + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/apimachinery/pkg/watch" + "k8s.io/client-go/tools/cache" + "k8s.io/klog/v2" +) + +// PreconditionFunc returns true if the condition has been reached, false if it has not been reached yet, +// or an error if the condition failed or detected an error state. +type PreconditionFunc func(store cache.Store) (bool, error) + +// ConditionFunc returns true if the condition has been reached, false if it has not been reached yet, +// or an error if the condition cannot be checked and should terminate. In general, it is better to define +// level driven conditions over edge driven conditions (pod has ready=true, vs pod modified and ready changed +// from false to true). +type ConditionFunc func(event watch.Event) (bool, error) + +// ErrWatchClosed is returned when the watch channel is closed before timeout in UntilWithoutRetry. +var ErrWatchClosed = errors.New("watch closed before UntilWithoutRetry timeout") + +// UntilWithoutRetry reads items from the watch until each provided condition succeeds, and then returns the last watch +// encountered. The first condition that returns an error terminates the watch (and the event is also returned). +// If no event has been received, the returned event will be nil. +// Conditions are satisfied sequentially so as to provide a useful primitive for higher level composition. +// Waits until context deadline or until context is canceled. +// +// Warning: Unless you have a very specific use case (probably a special Watcher) don't use this function!!! +// Warning: This will fail e.g. on API timeouts and/or 'too old resource version' error. +// Warning: You are most probably looking for a function *Until* or *UntilWithSync* below, +// Warning: solving such issues. +// TODO: Consider making this function private to prevent misuse when the other occurrences in our codebase are gone. +func UntilWithoutRetry(ctx context.Context, watcher watch.Interface, conditions ...ConditionFunc) (*watch.Event, error) { + ch := watcher.ResultChan() + defer watcher.Stop() + var lastEvent *watch.Event + for _, condition := range conditions { + // check the next condition against the previous event and short circuit waiting for the next watch + if lastEvent != nil { + done, err := condition(*lastEvent) + if err != nil { + return lastEvent, err + } + if done { + continue + } + } + ConditionSucceeded: + for { + select { + case event, ok := <-ch: + if !ok { + return lastEvent, ErrWatchClosed + } + lastEvent = &event + + done, err := condition(event) + if err != nil { + return lastEvent, err + } + if done { + break ConditionSucceeded + } + + case <-ctx.Done(): + return lastEvent, wait.ErrWaitTimeout + } + } + } + return lastEvent, nil +} + +// Until wraps the watcherClient's watch function with RetryWatcher making sure that watcher gets restarted in case of errors. +// The initialResourceVersion will be given to watch method when first called. It shall not be "" or "0" +// given the underlying WATCH call issues (#74022). +// Remaining behaviour is identical to function UntilWithoutRetry. (See above.) +// Until can deal with API timeouts and lost connections. +// It guarantees you to see all events and in the order they happened. +// Due to this guarantee there is no way it can deal with 'Resource version too old error'. It will fail in this case. +// (See `UntilWithSync` if you'd prefer to recover from all the errors including RV too old by re-listing +// those items. In normal code you should care about being level driven so you'd not care about not seeing all the edges.) +// +// The most frequent usage for Until would be a test where you want to verify exact order of events ("edges"). +func Until(ctx context.Context, initialResourceVersion string, watcherClient cache.Watcher, conditions ...ConditionFunc) (*watch.Event, error) { + w, err := NewRetryWatcherWithContext(ctx, initialResourceVersion, cache.ToWatcherWithContext(watcherClient)) + if err != nil { + return nil, err + } + + return UntilWithoutRetry(ctx, w, conditions...) +} + +// UntilWithSync creates an informer from lw, optionally checks precondition when the store is synced, +// and watches the output until each provided condition succeeds, in a way that is identical +// to function UntilWithoutRetry. (See above.) +// UntilWithSync can deal with all errors like API timeout, lost connections and 'Resource version too old'. +// It is the only function that can recover from 'Resource version too old', Until and UntilWithoutRetry will +// just fail in that case. On the other hand it can't provide you with guarantees as strong as using simple +// Watch method with Until. It can skip some intermediate events in case of watch function failing but it will +// re-list to recover and you always get an event, if there has been a change, after recovery. +// Also with the current implementation based on DeltaFIFO, order of the events you receive is guaranteed only for +// particular object, not between more of them even it's the same resource. +// The most frequent usage would be a command that needs to watch the "state of the world" and should't fail, like: +// waiting for object reaching a state, "small" controllers, ... +func UntilWithSync(ctx context.Context, lw cache.ListerWatcher, objType runtime.Object, precondition PreconditionFunc, conditions ...ConditionFunc) (*watch.Event, error) { + indexer, informer, watcher, done := NewIndexerInformerWatcherWithLogger(klog.FromContext(ctx), lw, objType) + // We need to wait for the internal informers to fully stop so it's easier to reason about + // and it works with non-thread safe clients. + defer func() { <-done }() + // Proxy watcher can be stopped multiple times so it's fine to use defer here to cover alternative branches and + // let UntilWithoutRetry to stop it + defer watcher.Stop() + + if precondition != nil { + if !cache.WaitForCacheSync(ctx.Done(), informer.HasSynced) { + return nil, fmt.Errorf("UntilWithSync: unable to sync caches: %w", ctx.Err()) + } + + done, err := precondition(indexer) + if err != nil { + return nil, err + } + + if done { + return nil, nil + } + } + + return UntilWithoutRetry(ctx, watcher, conditions...) +} + +// ContextWithOptionalTimeout wraps context.WithTimeout and handles infinite timeouts expressed as 0 duration. +func ContextWithOptionalTimeout(parent context.Context, timeout time.Duration) (context.Context, context.CancelFunc) { + if timeout < 0 { + // This should be handled in validation + klog.FromContext(parent).Error(nil, "Timeout for context shall not be negative") + timeout = 0 + } + + if timeout == 0 { + return context.WithCancel(parent) + } + + return context.WithTimeout(parent, timeout) +} diff --git a/vendor/k8s.io/client-go/util/certificate/OWNERS b/vendor/k8s.io/client-go/util/certificate/OWNERS new file mode 100644 index 0000000000..3c3b94c58c --- /dev/null +++ b/vendor/k8s.io/client-go/util/certificate/OWNERS @@ -0,0 +1,8 @@ +# See the OWNERS docs at https://go.k8s.io/owners + +approvers: + - sig-auth-certificates-approvers +reviewers: + - sig-auth-certificates-reviewers +labels: + - sig/auth diff --git a/vendor/k8s.io/client-go/util/certificate/certificate_manager.go b/vendor/k8s.io/client-go/util/certificate/certificate_manager.go new file mode 100644 index 0000000000..cda9dfe47d --- /dev/null +++ b/vendor/k8s.io/client-go/util/certificate/certificate_manager.go @@ -0,0 +1,809 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package certificate + +import ( + "context" + "crypto/ecdsa" + "crypto/elliptic" + cryptorand "crypto/rand" + "crypto/rsa" + "crypto/tls" + "crypto/x509" + "encoding/pem" + "errors" + "fmt" + "reflect" + "sync" + "time" + + "k8s.io/klog/v2" + + certificates "k8s.io/api/certificates/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/apimachinery/pkg/util/wait" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/client-go/util/cert" + "k8s.io/client-go/util/certificate/csr" + "k8s.io/client-go/util/keyutil" +) + +var ( + // certificateWaitTimeout controls the amount of time we wait for certificate + // approval in one iteration. + certificateWaitTimeout = 15 * time.Minute + + kubeletServingUsagesWithEncipherment = []certificates.KeyUsage{ + // https://tools.ietf.org/html/rfc5280#section-4.2.1.3 + // + // Digital signature allows the certificate to be used to verify + // digital signatures used during TLS negotiation. + certificates.UsageDigitalSignature, + // KeyEncipherment allows the cert/key pair to be used to encrypt + // keys, including the symmetric keys negotiated during TLS setup + // and used for data transfer. + certificates.UsageKeyEncipherment, + // ServerAuth allows the cert to be used by a TLS server to + // authenticate itself to a TLS client. + certificates.UsageServerAuth, + } + kubeletServingUsagesNoEncipherment = []certificates.KeyUsage{ + // https://tools.ietf.org/html/rfc5280#section-4.2.1.3 + // + // Digital signature allows the certificate to be used to verify + // digital signatures used during TLS negotiation. + certificates.UsageDigitalSignature, + // ServerAuth allows the cert to be used by a TLS server to + // authenticate itself to a TLS client. + certificates.UsageServerAuth, + } + DefaultKubeletServingGetUsages = func(privateKey interface{}) []certificates.KeyUsage { + switch privateKey.(type) { + case *rsa.PrivateKey: + return kubeletServingUsagesWithEncipherment + default: + return kubeletServingUsagesNoEncipherment + } + } + kubeletClientUsagesWithEncipherment = []certificates.KeyUsage{ + // https://tools.ietf.org/html/rfc5280#section-4.2.1.3 + // + // Digital signature allows the certificate to be used to verify + // digital signatures used during TLS negotiation. + certificates.UsageDigitalSignature, + // KeyEncipherment allows the cert/key pair to be used to encrypt + // keys, including the symmetric keys negotiated during TLS setup + // and used for data transfer. + certificates.UsageKeyEncipherment, + // ClientAuth allows the cert to be used by a TLS client to + // authenticate itself to the TLS server. + certificates.UsageClientAuth, + } + kubeletClientUsagesNoEncipherment = []certificates.KeyUsage{ + // https://tools.ietf.org/html/rfc5280#section-4.2.1.3 + // + // Digital signature allows the certificate to be used to verify + // digital signatures used during TLS negotiation. + certificates.UsageDigitalSignature, + // ClientAuth allows the cert to be used by a TLS client to + // authenticate itself to the TLS server. + certificates.UsageClientAuth, + } + DefaultKubeletClientGetUsages = func(privateKey interface{}) []certificates.KeyUsage { + switch privateKey.(type) { + case *rsa.PrivateKey: + return kubeletClientUsagesWithEncipherment + default: + return kubeletClientUsagesNoEncipherment + } + } +) + +// Manager maintains and updates the certificates in use by this certificate +// manager. In the background it communicates with the API server to get new +// certificates for certificates about to expire. +type Manager interface { + // Start the API server status sync loop. + Start() + // Stop the cert manager loop. + Stop() + // Current returns the currently selected certificate from the + // certificate manager, as well as the associated certificate and key data + // in PEM format. + Current() *tls.Certificate + // ServerHealthy returns true if the manager is able to communicate with + // the server. This allows a caller to determine whether the cert manager + // thinks it can potentially talk to the API server. The cert manager may + // be very conservative and only return true if recent communication has + // occurred with the server. + ServerHealthy() bool +} + +// Config is the set of configuration parameters available for a new Manager. +type Config struct { + // ClientsetFn will be used to create a clientset for + // creating/fetching new certificate requests generated when a key rotation occurs. + // The function will never be invoked in parallel. + // It is passed the current client certificate if one exists. + ClientsetFn ClientsetFunc + // Template is the CertificateRequest that will be used as a template for + // generating certificate signing requests for all new keys generated as + // part of rotation. It follows the same rules as the template parameter of + // crypto.x509.CreateCertificateRequest in the Go standard libraries. + Template *x509.CertificateRequest + // GetTemplate returns the CertificateRequest that will be used as a template for + // generating certificate signing requests for all new keys generated as + // part of rotation. It follows the same rules as the template parameter of + // crypto.x509.CreateCertificateRequest in the Go standard libraries. + // If no template is available, nil may be returned, and no certificate will be requested. + // If specified, takes precedence over Template. + GetTemplate func() *x509.CertificateRequest + // SignerName is the name of the certificate signer that should sign certificates + // generated by the manager. + SignerName string + // RequestedCertificateLifetime is the requested lifetime length for certificates generated by the manager. + // Optional. + // This will set the spec.expirationSeconds field on the CSR. Controlling the lifetime of + // the issued certificate is not guaranteed as the signer may choose to ignore the request. + RequestedCertificateLifetime *time.Duration + // Usages is the types of usages that certificates generated by the manager + // can be used for. It is mutually exclusive with GetUsages. + Usages []certificates.KeyUsage + // GetUsages is dynamic way to get the types of usages that certificates generated by the manager + // can be used for. If Usages is not nil, GetUsages has to be nil, vice versa. + // It is mutually exclusive with Usages. + GetUsages func(privateKey interface{}) []certificates.KeyUsage + // CertificateStore is a persistent store where the current cert/key is + // kept and future cert/key pairs will be persisted after they are + // generated. + CertificateStore Store + // BootstrapCertificatePEM is the certificate data that will be returned + // from the Manager if the CertificateStore doesn't have any cert/key pairs + // currently available and has not yet had a chance to get a new cert/key + // pair from the API. If the CertificateStore does have a cert/key pair, + // this will be ignored. If there is no cert/key pair available in the + // CertificateStore, as soon as Start is called, it will request a new + // cert/key pair from the CertificateSigningRequestClient. This is intended + // to allow the first boot of a component to be initialized using a + // generic, multi-use cert/key pair which will be quickly replaced with a + // unique cert/key pair. + BootstrapCertificatePEM []byte + // BootstrapKeyPEM is the key data that will be returned from the Manager + // if the CertificateStore doesn't have any cert/key pairs currently + // available. If the CertificateStore does have a cert/key pair, this will + // be ignored. If the bootstrap cert/key pair are used, they will be + // rotated at the first opportunity, possibly well in advance of expiring. + // This is intended to allow the first boot of a component to be + // initialized using a generic, multi-use cert/key pair which will be + // quickly replaced with a unique cert/key pair. + BootstrapKeyPEM []byte `datapolicy:"security-key"` + // CertificateRotation will record a metric showing the time in seconds + // that certificates lived before being rotated. This metric is a histogram + // because there is value in keeping a history of rotation cadences. It + // allows one to setup monitoring and alerting of unexpected rotation + // behavior and track trends in rotation frequency. + CertificateRotation Histogram + // CertifcateRenewFailure will record a metric that keeps track of + // certificate renewal failures. + CertificateRenewFailure Counter + // Name is an optional string that will be used when writing log output + // via logger.WithName or returning errors from manager methods. + // + // If not set, SignerName will + // be used, if SignerName is not set, if Usages includes client auth the + // name will be "client auth", otherwise the value will be "server". + Name string + // Ctx is an optional context. Cancelling it is equivalent to + // calling Stop. A logger is extracted from it if non-nil, otherwise + // klog.Background() is used. + Ctx *context.Context +} + +// Store is responsible for getting and updating the current certificate. +// Depending on the concrete implementation, the backing store for this +// behavior may vary. +type Store interface { + // Current returns the currently selected certificate, as well as the + // associated certificate and key data in PEM format. If the Store doesn't + // have a cert/key pair currently, it should return a NoCertKeyError so + // that the Manager can recover by using bootstrap certificates to request + // a new cert/key pair. + Current() (*tls.Certificate, error) + // Update accepts the PEM data for the cert/key pair and makes the new + // cert/key pair the 'current' pair, that will be returned by future calls + // to Current(). + Update(cert, key []byte) (*tls.Certificate, error) +} + +// Gauge will record the remaining lifetime of the certificate each time it is +// updated. +type Gauge interface { + Set(float64) +} + +// Histogram will record the time a rotated certificate was used before being +// rotated. +type Histogram interface { + Observe(float64) +} + +// Counter will wrap a counter with labels +type Counter interface { + Inc() +} + +// NoCertKeyError indicates there is no cert/key currently available. +type NoCertKeyError string + +// ClientsetFunc returns a new clientset for discovering CSR API availability and requesting CSRs. +// It is passed the current certificate if one is available and valid. +type ClientsetFunc func(current *tls.Certificate) (clientset.Interface, error) + +func (e *NoCertKeyError) Error() string { return string(*e) } + +type manager struct { + getTemplate func() *x509.CertificateRequest + + // lastRequestLock guards lastRequestCancel and lastRequest + lastRequestLock sync.Mutex + lastRequestCancel context.CancelFunc + lastRequest *x509.CertificateRequest + + dynamicTemplate bool + signerName string + requestedCertificateLifetime *time.Duration + getUsages func(privateKey interface{}) []certificates.KeyUsage + forceRotation bool + + certStore Store + + certificateRotation Histogram + certificateRenewFailure Counter + + // the following variables must only be accessed under certAccessLock + certAccessLock sync.RWMutex + cert *tls.Certificate + serverHealth bool + + // Context and cancel function for background goroutines. + ctx context.Context + cancel func(err error) + + // the clientFn must only be accessed under the clientAccessLock + clientAccessLock sync.Mutex + clientsetFn ClientsetFunc + + // Set to time.Now but can be stubbed out for testing + now func() time.Time +} + +// NewManager returns a new certificate manager. A certificate manager is +// responsible for being the authoritative source of certificates in the +// Kubelet and handling updates due to rotation. +func NewManager(config *Config) (Manager, error) { + + getTemplate := config.GetTemplate + if getTemplate == nil { + getTemplate = func() *x509.CertificateRequest { return config.Template } + } + + if config.GetUsages != nil && config.Usages != nil { + return nil, errors.New("cannot specify both GetUsages and Usages") + } + if config.GetUsages == nil && config.Usages == nil { + return nil, errors.New("either GetUsages or Usages should be specified") + } + var getUsages func(interface{}) []certificates.KeyUsage + if config.GetUsages != nil { + getUsages = config.GetUsages + } else { + getUsages = func(interface{}) []certificates.KeyUsage { return config.Usages } + } + m := manager{ + clientsetFn: config.ClientsetFn, + getTemplate: getTemplate, + dynamicTemplate: config.GetTemplate != nil, + signerName: config.SignerName, + requestedCertificateLifetime: config.RequestedCertificateLifetime, + getUsages: getUsages, + certStore: config.CertificateStore, + certificateRotation: config.CertificateRotation, + certificateRenewFailure: config.CertificateRenewFailure, + now: time.Now, + } + + // Determine the name that is to be included in log output from this manager instance. + name := config.Name + if len(name) == 0 { + name = m.signerName + } + if len(name) == 0 { + usages := getUsages(nil) + switch { + case hasKeyUsage(usages, certificates.UsageClientAuth): + name = string(certificates.UsageClientAuth) + default: + name = "certificate" + } + } + + // The name gets included through contextual logging. + logger := klog.Background() + if config.Ctx != nil { + logger = klog.FromContext(*config.Ctx) + } + logger = klog.LoggerWithName(logger, name) + + cert, forceRotation, err := getCurrentCertificateOrBootstrap( + logger, + config.CertificateStore, + config.BootstrapCertificatePEM, + config.BootstrapKeyPEM) + if err != nil { + return nil, err + } + m.cert, m.forceRotation = cert, forceRotation + + // cancel will be called by Stop, ctx.Done is our stop channel. + m.ctx, m.cancel = context.WithCancelCause(context.Background()) + if config.Ctx != nil && (*config.Ctx).Done() != nil { + ctx := *config.Ctx + // If we have been passed a context and it has a Done channel, then + // we need to map its cancellation to our Done method. + go func() { + <-ctx.Done() + m.Stop() + }() + } + m.ctx = klog.NewContext(m.ctx, logger) + + return &m, nil +} + +// Current returns the currently selected certificate from the certificate +// manager. This can be nil if the manager was initialized without a +// certificate and has not yet received one from the +// CertificateSigningRequestClient, or if the current cert has expired. +func (m *manager) Current() *tls.Certificate { + m.certAccessLock.RLock() + defer m.certAccessLock.RUnlock() + if m.cert != nil && m.cert.Leaf != nil && m.now().After(m.cert.Leaf.NotAfter) { + klog.FromContext(m.ctx).V(2).Info("Current certificate is expired") + return nil + } + return m.cert +} + +// ServerHealthy returns true if the cert manager believes the server +// is currently alive. +func (m *manager) ServerHealthy() bool { + m.certAccessLock.RLock() + defer m.certAccessLock.RUnlock() + return m.serverHealth +} + +// Stop terminates the manager. +func (m *manager) Stop() { + m.cancel(errors.New("asked to stop")) +} + +// Start will start the background work of rotating the certificates. +func (m *manager) Start() { + go m.run() +} + +// run, in contrast to Start, blocks while the manager is running. +// It waits for all goroutines to stop. +func (m *manager) run() { + logger := klog.FromContext(m.ctx) + // Certificate rotation depends on access to the API server certificate + // signing API, so don't start the certificate manager if we don't have a + // client. + if m.clientsetFn == nil { + logger.V(2).Info("Certificate rotation is not enabled, no connection to the apiserver") + return + } + logger.V(2).Info("Certificate rotation is enabled") + + var wg sync.WaitGroup + defer wg.Wait() + + templateChanged := make(chan struct{}) + rotate := func(ctx context.Context) { + deadline := m.nextRotationDeadline(logger) + if sleepInterval := deadline.Sub(m.now()); sleepInterval > 0 { + logger.V(2).Info("Waiting for next certificate rotation", "sleep", sleepInterval) + + timer := time.NewTimer(sleepInterval) + defer timer.Stop() + + select { + case <-timer.C: + // unblock when deadline expires + case <-templateChanged: + _, lastRequestTemplate := m.getLastRequest() + if reflect.DeepEqual(lastRequestTemplate, m.getTemplate()) { + // if the template now matches what we last requested, restart the rotation deadline loop + return + } + logger.V(2).Info("Certificate template changed, rotating") + } + } + + // Don't enter rotateCerts and trigger backoff if we don't even have a template to request yet + if m.getTemplate() == nil { + return + } + + backoff := wait.Backoff{ + Duration: 2 * time.Second, + Factor: 2, + Jitter: 0.1, + Steps: 5, + } + if err := wait.ExponentialBackoffWithContext(ctx, backoff, m.rotateCerts); err != nil { + utilruntime.HandleErrorWithContext(ctx, err, "Reached backoff limit, still unable to rotate certs") + wait.PollInfiniteWithContext(ctx, 32*time.Second, m.rotateCerts) + } + } + + wg.Add(1) + go func() { + defer wg.Done() + wait.UntilWithContext(m.ctx, rotate, time.Second) + }() + + if m.dynamicTemplate { + template := func(ctx context.Context) { + // check if the current template matches what we last requested + lastRequestCancel, lastRequestTemplate := m.getLastRequest() + + if !m.certSatisfiesTemplate(logger) && !reflect.DeepEqual(lastRequestTemplate, m.getTemplate()) { + // if the template is different, queue up an interrupt of the rotation deadline loop. + // if we've requested a CSR that matches the new template by the time the interrupt is handled, the interrupt is disregarded. + if lastRequestCancel != nil { + // if we're currently waiting on a submitted request that no longer matches what we want, stop waiting + lastRequestCancel() + } + select { + case templateChanged <- struct{}{}: + case <-ctx.Done(): + } + } + } + wg.Add(1) + go func() { + defer wg.Done() + wait.UntilWithContext(m.ctx, template, time.Second) + }() + } +} + +func getCurrentCertificateOrBootstrap( + logger klog.Logger, + store Store, + bootstrapCertificatePEM []byte, + bootstrapKeyPEM []byte) (cert *tls.Certificate, shouldRotate bool, errResult error) { + + currentCert, err := store.Current() + if err == nil { + // if the current cert is expired, fall back to the bootstrap cert + if currentCert.Leaf != nil && time.Now().Before(currentCert.Leaf.NotAfter) { + return currentCert, false, nil + } + } else { + if _, ok := err.(*NoCertKeyError); !ok { + return nil, false, err + } + } + + if bootstrapCertificatePEM == nil || bootstrapKeyPEM == nil { + return nil, true, nil + } + + bootstrapCert, err := tls.X509KeyPair(bootstrapCertificatePEM, bootstrapKeyPEM) + if err != nil { + return nil, false, err + } + if len(bootstrapCert.Certificate) < 1 { + return nil, false, fmt.Errorf("no cert/key data found") + } + + certs, err := x509.ParseCertificates(bootstrapCert.Certificate[0]) + if err != nil { + return nil, false, fmt.Errorf("unable to parse certificate data: %w", err) + } + if len(certs) < 1 { + return nil, false, fmt.Errorf("no cert data found") + } + bootstrapCert.Leaf = certs[0] + + if _, err := store.Update(bootstrapCertificatePEM, bootstrapKeyPEM); err != nil { + utilruntime.HandleErrorWithLogger(logger, err, "Unable to set the cert/key pair to the bootstrap certificate") + } + + return &bootstrapCert, true, nil +} + +func (m *manager) getClientset() (clientset.Interface, error) { + current := m.Current() + m.clientAccessLock.Lock() + defer m.clientAccessLock.Unlock() + return m.clientsetFn(current) +} + +// RotateCerts is exposed for testing only and is not a part of the public interface. +// Returns true if it changed the cert, false otherwise. Error is only returned in +// exceptional cases. +func (m *manager) RotateCerts() (bool, error) { + return m.rotateCerts(m.ctx) +} + +// rotateCerts attempts to request a client cert from the server, wait a reasonable +// period of time for it to be signed, and then update the cert on disk. If it cannot +// retrieve a cert, it will return false. It will only return error in exceptional cases. +// This method also keeps track of "server health" by interpreting the responses it gets +// from the server on the various calls it makes. +// TODO: return errors, have callers handle and log them correctly +func (m *manager) rotateCerts(ctx context.Context) (bool, error) { + logger := klog.FromContext(ctx) + logger.V(2).Info("Rotating certificates") + + template, csrPEM, keyPEM, privateKey, err := m.generateCSR() + if err != nil { + utilruntime.HandleErrorWithContext(ctx, err, "Unable to generate a certificate signing request") + if m.certificateRenewFailure != nil { + m.certificateRenewFailure.Inc() + } + return false, nil + } + + // request the client each time + clientSet, err := m.getClientset() + if err != nil { + utilruntime.HandleErrorWithContext(ctx, err, "Unable to load a client to request certificates") + if m.certificateRenewFailure != nil { + m.certificateRenewFailure.Inc() + } + return false, nil + } + + getUsages := m.getUsages + if m.getUsages == nil { + getUsages = DefaultKubeletClientGetUsages + } + usages := getUsages(privateKey) + // Call the Certificate Signing Request API to get a certificate for the + // new private key + reqName, reqUID, err := csr.RequestCertificateWithContext(ctx, clientSet, csrPEM, "", m.signerName, m.requestedCertificateLifetime, usages, privateKey) + if err != nil { + utilruntime.HandleErrorWithContext(ctx, err, "Failed while requesting a signed certificate from the control plane") + if m.certificateRenewFailure != nil { + m.certificateRenewFailure.Inc() + } + return false, m.updateServerError(err) + } + + ctx, cancel := context.WithTimeout(ctx, certificateWaitTimeout) + defer cancel() + + // Once we've successfully submitted a CSR for this template, record that we did so + m.setLastRequest(cancel, template) + + // Wait for the certificate to be signed. This interface and internal timout + // is a remainder after the old design using raw watch wrapped with backoff. + crtPEM, err := csr.WaitForCertificate(ctx, clientSet, reqName, reqUID) + if err != nil { + utilruntime.HandleErrorWithContext(ctx, err, "Certificate request was not signed") + if m.certificateRenewFailure != nil { + m.certificateRenewFailure.Inc() + } + return false, nil + } + + cert, err := m.certStore.Update(crtPEM, keyPEM) + if err != nil { + utilruntime.HandleErrorWithContext(ctx, err, "Unable to store the new cert/key pair") + if m.certificateRenewFailure != nil { + m.certificateRenewFailure.Inc() + } + return false, nil + } + + if old := m.updateCached(cert); old != nil && m.certificateRotation != nil { + m.certificateRotation.Observe(m.now().Sub(old.Leaf.NotBefore).Seconds()) + } + + return true, nil +} + +// Check that the current certificate on disk satisfies the requests from the +// current template. +// +// Note that extra items in the certificate's SAN or orgs that don't exist in +// the template will not trigger a renewal. +// +// Requires certAccessLock to be locked. +func (m *manager) certSatisfiesTemplateLocked(logger klog.Logger) bool { + if m.cert == nil { + return false + } + + if template := m.getTemplate(); template != nil { + if template.Subject.CommonName != m.cert.Leaf.Subject.CommonName { + logger.V(2).Info("Current certificate CN does not match requested CN", "currentName", m.cert.Leaf.Subject.CommonName, "requestedName", template.Subject.CommonName) + return false + } + + currentDNSNames := sets.NewString(m.cert.Leaf.DNSNames...) + desiredDNSNames := sets.NewString(template.DNSNames...) + missingDNSNames := desiredDNSNames.Difference(currentDNSNames) + if len(missingDNSNames) > 0 { + logger.V(2).Info("Current certificate is missing requested DNS names", "dnsNames", missingDNSNames.List()) + return false + } + + currentIPs := sets.NewString() + for _, ip := range m.cert.Leaf.IPAddresses { + currentIPs.Insert(ip.String()) + } + desiredIPs := sets.NewString() + for _, ip := range template.IPAddresses { + desiredIPs.Insert(ip.String()) + } + missingIPs := desiredIPs.Difference(currentIPs) + if len(missingIPs) > 0 { + logger.V(2).Info("Current certificate is missing requested IP addresses", "IPs", missingIPs.List()) + return false + } + + currentOrgs := sets.NewString(m.cert.Leaf.Subject.Organization...) + desiredOrgs := sets.NewString(template.Subject.Organization...) + missingOrgs := desiredOrgs.Difference(currentOrgs) + if len(missingOrgs) > 0 { + logger.V(2).Info("Current certificate is missing requested orgs", "orgs", missingOrgs.List()) + return false + } + } + + return true +} + +func (m *manager) certSatisfiesTemplate(logger klog.Logger) bool { + m.certAccessLock.RLock() + defer m.certAccessLock.RUnlock() + return m.certSatisfiesTemplateLocked(logger) +} + +// nextRotationDeadline returns a value for the threshold at which the +// current certificate should be rotated, 80%+/-10% of the expiration of the +// certificate. +func (m *manager) nextRotationDeadline(logger klog.Logger) time.Time { + // forceRotation is not protected by locks + if m.forceRotation { + m.forceRotation = false + return m.now() + } + + m.certAccessLock.RLock() + defer m.certAccessLock.RUnlock() + + if !m.certSatisfiesTemplateLocked(logger) { + return m.now() + } + + notAfter := m.cert.Leaf.NotAfter + totalDuration := float64(notAfter.Sub(m.cert.Leaf.NotBefore)) + deadline := m.cert.Leaf.NotBefore.Add(jitteryDuration(totalDuration)) + + logger.V(2).Info("Certificate rotation deadline determined", "expiration", notAfter, "deadline", deadline) + return deadline +} + +// jitteryDuration uses some jitter to set the rotation threshold so each node +// will rotate at approximately 70-90% of the total lifetime of the +// certificate. With jitter, if a number of nodes are added to a cluster at +// approximately the same time (such as cluster creation time), they won't all +// try to rotate certificates at the same time for the rest of the life of the +// cluster. +// +// This function is represented as a variable to allow replacement during testing. +var jitteryDuration = func(totalDuration float64) time.Duration { + return wait.Jitter(time.Duration(totalDuration), 0.2) - time.Duration(totalDuration*0.3) +} + +// updateCached sets the most recent retrieved cert and returns the old cert. +// It also sets the server as assumed healthy. +func (m *manager) updateCached(cert *tls.Certificate) *tls.Certificate { + m.certAccessLock.Lock() + defer m.certAccessLock.Unlock() + m.serverHealth = true + old := m.cert + m.cert = cert + return old +} + +// updateServerError takes an error returned by the server and infers +// the health of the server based on the error. It will return nil if +// the error does not require immediate termination of any wait loops, +// and otherwise it will return the error. +func (m *manager) updateServerError(err error) error { + m.certAccessLock.Lock() + defer m.certAccessLock.Unlock() + switch { + case apierrors.IsUnauthorized(err): + // SSL terminating proxies may report this error instead of the master + m.serverHealth = true + case apierrors.IsUnexpectedServerError(err): + // generally indicates a proxy or other load balancer problem, rather than a problem coming + // from the master + m.serverHealth = false + default: + // Identify known errors that could be expected for a cert request that + // indicate everything is working normally + m.serverHealth = apierrors.IsNotFound(err) || apierrors.IsForbidden(err) + } + return nil +} + +func (m *manager) generateCSR() (template *x509.CertificateRequest, csrPEM []byte, keyPEM []byte, key interface{}, err error) { + // Generate a new private key. + privateKey, err := ecdsa.GenerateKey(elliptic.P256(), cryptorand.Reader) + if err != nil { + return nil, nil, nil, nil, fmt.Errorf("unable to generate a new private key: %w", err) + } + der, err := x509.MarshalECPrivateKey(privateKey) + if err != nil { + return nil, nil, nil, nil, fmt.Errorf("unable to marshal the new key to DER: %w", err) + } + + keyPEM = pem.EncodeToMemory(&pem.Block{Type: keyutil.ECPrivateKeyBlockType, Bytes: der}) + + template = m.getTemplate() + if template == nil { + return nil, nil, nil, nil, errors.New("unable to create a csr, no template available") + } + csrPEM, err = cert.MakeCSRFromTemplate(privateKey, template) + if err != nil { + return nil, nil, nil, nil, fmt.Errorf("unable to create a csr from the private key: %w", err) + } + return template, csrPEM, keyPEM, privateKey, nil +} + +func (m *manager) getLastRequest() (context.CancelFunc, *x509.CertificateRequest) { + m.lastRequestLock.Lock() + defer m.lastRequestLock.Unlock() + return m.lastRequestCancel, m.lastRequest +} + +func (m *manager) setLastRequest(cancel context.CancelFunc, r *x509.CertificateRequest) { + m.lastRequestLock.Lock() + defer m.lastRequestLock.Unlock() + m.lastRequestCancel = cancel + m.lastRequest = r +} + +func hasKeyUsage(usages []certificates.KeyUsage, usage certificates.KeyUsage) bool { + for _, u := range usages { + if u == usage { + return true + } + } + return false +} diff --git a/vendor/k8s.io/client-go/util/certificate/certificate_store.go b/vendor/k8s.io/client-go/util/certificate/certificate_store.go new file mode 100644 index 0000000000..4f9d5db06a --- /dev/null +++ b/vendor/k8s.io/client-go/util/certificate/certificate_store.go @@ -0,0 +1,335 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package certificate + +import ( + "crypto/tls" + "crypto/x509" + "encoding/pem" + "fmt" + "os" + "path/filepath" + "time" + + certutil "k8s.io/client-go/util/cert" + "k8s.io/klog/v2" +) + +const ( + keyExtension = ".key" + certExtension = ".crt" + pemExtension = ".pem" + currentPair = "current" + updatedPair = "updated" +) + +type fileStore struct { + logger klog.Logger + pairNamePrefix string + certDirectory string + keyDirectory string + certFile string + keyFile string +} + +// FileStore is a store that provides certificate retrieval as well as +// the path on disk of the current PEM. +type FileStore interface { + Store + // CurrentPath returns the path on disk of the current certificate/key + // pair encoded as PEM files. + CurrentPath() string +} + +// NewFileStore returns a concrete implementation of a Store that is based on +// storing the cert/key pairs in a single file per pair on disk in the +// designated directory. When starting up it will look for the currently +// selected cert/key pair in: +// +// 1. ${certDirectory}/${pairNamePrefix}-current.pem - both cert and key are in the same file. +// 2. ${certFile}, ${keyFile} +// 3. ${certDirectory}/${pairNamePrefix}.crt, ${keyDirectory}/${pairNamePrefix}.key +// +// The first one found will be used. If rotation is enabled, future cert/key +// updates will be written to the ${certDirectory} directory and +// ${certDirectory}/${pairNamePrefix}-current.pem will be created as a soft +// link to the currently selected cert/key pair. +// +// Contextual logging: NewFileStoreWithLogger should be used instead of NewFileStore in code which supports contextual logging. +func NewFileStore( + pairNamePrefix string, + certDirectory string, + keyDirectory string, + certFile string, + keyFile string) (FileStore, error) { + return NewFileStoreWithLogger(klog.Background(), pairNamePrefix, certDirectory, keyDirectory, certFile, keyFile) +} + +// NewFileStoreWithLogger is a variant of NewFileStore where the caller is in +// control of logging. All log messages get emitted with logger.Info, so +// pass e.g. logger.V(3) to make logging less verbose. +func NewFileStoreWithLogger( + logger klog.Logger, + pairNamePrefix string, + certDirectory string, + keyDirectory string, + certFile string, + keyFile string) (FileStore, error) { + + s := fileStore{ + logger: logger, + pairNamePrefix: pairNamePrefix, + certDirectory: certDirectory, + keyDirectory: keyDirectory, + certFile: certFile, + keyFile: keyFile, + } + if err := s.recover(); err != nil { + return nil, err + } + return &s, nil +} + +// CurrentPath returns the path to the current version of these certificates. +func (s *fileStore) CurrentPath() string { + return filepath.Join(s.certDirectory, s.filename(currentPair)) +} + +// recover checks if there is a certificate rotation that was interrupted while +// progress, and if so, attempts to recover to a good state. +func (s *fileStore) recover() error { + // If the 'current' file doesn't exist, continue on with the recovery process. + currentPath := filepath.Join(s.certDirectory, s.filename(currentPair)) + if exists, err := fileExists(currentPath); err != nil { + return err + } else if exists { + return nil + } + + // If the 'updated' file exists, and it is a symbolic link, continue on + // with the recovery process. + updatedPath := filepath.Join(s.certDirectory, s.filename(updatedPair)) + if fi, err := os.Lstat(updatedPath); err != nil { + if os.IsNotExist(err) { + return nil + } + return err + } else if fi.Mode()&os.ModeSymlink != os.ModeSymlink { + return fmt.Errorf("expected %q to be a symlink but it is a file", updatedPath) + } + + // Move the 'updated' symlink to 'current'. + if err := os.Rename(updatedPath, currentPath); err != nil { + return fmt.Errorf("unable to rename %q to %q: %v", updatedPath, currentPath, err) + } + return nil +} + +func (s *fileStore) Current() (*tls.Certificate, error) { + pairFile := filepath.Join(s.certDirectory, s.filename(currentPair)) + if pairFileExists, err := fileExists(pairFile); err != nil { + return nil, err + } else if pairFileExists { + s.logger.Info("Loading cert/key pair from a file", "filePath", pairFile) + return loadFile(pairFile) + } + + certFileExists, err := fileExists(s.certFile) + if err != nil { + return nil, err + } + keyFileExists, err := fileExists(s.keyFile) + if err != nil { + return nil, err + } + if certFileExists && keyFileExists { + s.logger.Info("Loading cert/key pair", "certFile", s.certFile, "keyFile", s.keyFile) + return loadX509KeyPair(s.certFile, s.keyFile) + } + + c := filepath.Join(s.certDirectory, s.pairNamePrefix+certExtension) + k := filepath.Join(s.keyDirectory, s.pairNamePrefix+keyExtension) + certFileExists, err = fileExists(c) + if err != nil { + return nil, err + } + keyFileExists, err = fileExists(k) + if err != nil { + return nil, err + } + if certFileExists && keyFileExists { + s.logger.Info("Loading cert/key pair", "certFile", c, "keyFile", k) + return loadX509KeyPair(c, k) + } + + noKeyErr := NoCertKeyError( + fmt.Sprintf("no cert/key files read at %q, (%q, %q) or (%q, %q)", + pairFile, + s.certFile, + s.keyFile, + s.certDirectory, + s.keyDirectory)) + return nil, &noKeyErr +} + +func loadFile(pairFile string) (*tls.Certificate, error) { + // LoadX509KeyPair knows how to parse combined cert and private key from + // the same file. + cert, err := tls.LoadX509KeyPair(pairFile, pairFile) + if err != nil { + return nil, fmt.Errorf("could not convert data from %q into cert/key pair: %v", pairFile, err) + } + certs, err := x509.ParseCertificates(cert.Certificate[0]) + if err != nil { + return nil, fmt.Errorf("unable to parse certificate data: %v", err) + } + cert.Leaf = certs[0] + return &cert, nil +} + +func (s *fileStore) Update(certData, keyData []byte) (*tls.Certificate, error) { + ts := time.Now().Format("2006-01-02-15-04-05") + pemFilename := s.filename(ts) + + if err := os.MkdirAll(s.certDirectory, 0755); err != nil { + return nil, fmt.Errorf("could not create directory %q to store certificates: %v", s.certDirectory, err) + } + certPath := filepath.Join(s.certDirectory, pemFilename) + + f, err := os.OpenFile(certPath, os.O_CREATE|os.O_TRUNC|os.O_RDWR, 0600) + if err != nil { + return nil, fmt.Errorf("could not open %q: %v", certPath, err) + } + defer f.Close() + + // First cert is leaf, remainder are intermediates + certs, err := certutil.ParseCertsPEM(certData) + if err != nil { + return nil, fmt.Errorf("invalid certificate data: %v", err) + } + for _, c := range certs { + pem.Encode(f, &pem.Block{Type: "CERTIFICATE", Bytes: c.Raw}) + } + + keyBlock, _ := pem.Decode(keyData) + if keyBlock == nil { + return nil, fmt.Errorf("invalid key data") + } + pem.Encode(f, keyBlock) + + cert, err := loadFile(certPath) + if err != nil { + return nil, err + } + + if err := s.updateSymlink(certPath); err != nil { + return nil, err + } + return cert, nil +} + +// updateSymLink updates the current symlink to point to the file that is +// passed it. It will fail if there is a non-symlink file exists where the +// symlink is expected to be. +func (s *fileStore) updateSymlink(filename string) error { + // If the 'current' file either doesn't exist, or is already a symlink, + // proceed. Otherwise, this is an unrecoverable error. + currentPath := filepath.Join(s.certDirectory, s.filename(currentPair)) + currentPathExists := false + if fi, err := os.Lstat(currentPath); err != nil { + if !os.IsNotExist(err) { + return err + } + } else if fi.Mode()&os.ModeSymlink != os.ModeSymlink { + return fmt.Errorf("expected %q to be a symlink but it is a file", currentPath) + } else { + currentPathExists = true + } + + // If the 'updated' file doesn't exist, proceed. If it exists but it is a + // symlink, delete it. Otherwise, this is an unrecoverable error. + updatedPath := filepath.Join(s.certDirectory, s.filename(updatedPair)) + if fi, err := os.Lstat(updatedPath); err != nil { + if !os.IsNotExist(err) { + return err + } + } else if fi.Mode()&os.ModeSymlink != os.ModeSymlink { + return fmt.Errorf("expected %q to be a symlink but it is a file", updatedPath) + } else { + if err := os.Remove(updatedPath); err != nil { + return fmt.Errorf("unable to remove %q: %v", updatedPath, err) + } + } + + // Check that the new cert/key pair file exists to avoid rotating to an + // invalid cert/key. + if filenameExists, err := fileExists(filename); err != nil { + return err + } else if !filenameExists { + return fmt.Errorf("file %q does not exist so it can not be used as the currently selected cert/key", filename) + } + + // Ensure the source path is absolute to ensure the symlink target is + // correct when certDirectory is a relative path. + filename, err := filepath.Abs(filename) + if err != nil { + return err + } + + // Create the 'updated' symlink pointing to the requested file name. + if err := os.Symlink(filename, updatedPath); err != nil { + return fmt.Errorf("unable to create a symlink from %q to %q: %v", updatedPath, filename, err) + } + + // Replace the 'current' symlink. + if currentPathExists { + if err := os.Remove(currentPath); err != nil { + return fmt.Errorf("unable to remove %q: %v", currentPath, err) + } + } + if err := os.Rename(updatedPath, currentPath); err != nil { + return fmt.Errorf("unable to rename %q to %q: %v", updatedPath, currentPath, err) + } + return nil +} + +func (s *fileStore) filename(qualifier string) string { + return s.pairNamePrefix + "-" + qualifier + pemExtension +} + +func loadX509KeyPair(certFile, keyFile string) (*tls.Certificate, error) { + cert, err := tls.LoadX509KeyPair(certFile, keyFile) + if err != nil { + return nil, err + } + certs, err := x509.ParseCertificates(cert.Certificate[0]) + if err != nil { + return nil, fmt.Errorf("unable to parse certificate data: %v", err) + } + cert.Leaf = certs[0] + return &cert, nil +} + +// FileExists checks if specified file exists. +func fileExists(filename string) (bool, error) { + if _, err := os.Stat(filename); os.IsNotExist(err) { + return false, nil + } else if err != nil { + return false, err + } + return true, nil +} diff --git a/vendor/k8s.io/client-go/util/certificate/csr/csr.go b/vendor/k8s.io/client-go/util/certificate/csr/csr.go new file mode 100644 index 0000000000..a2921ecd4f --- /dev/null +++ b/vendor/k8s.io/client-go/util/certificate/csr/csr.go @@ -0,0 +1,377 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package csr + +import ( + "context" + "crypto" + "crypto/x509" + "encoding/pem" + "fmt" + "reflect" + "time" + + certificatesv1 "k8s.io/api/certificates/v1" + certificatesv1beta1 "k8s.io/api/certificates/v1beta1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/apimachinery/pkg/watch" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/cache" + watchtools "k8s.io/client-go/tools/watch" + certutil "k8s.io/client-go/util/cert" + "k8s.io/klog/v2" + "k8s.io/utils/pointer" +) + +// RequestCertificate will either use an existing (if this process has run +// before but not to completion) or create a certificate signing request using the +// PEM encoded CSR and send it to API server. An optional requestedDuration may be passed +// to set the spec.expirationSeconds field on the CSR to control the lifetime of the issued +// certificate. This is not guaranteed as the signer may choose to ignore the request. +// +// Deprecated: use RequestCertificateWithContext instead. +func RequestCertificate(client clientset.Interface, csrData []byte, name, signerName string, requestedDuration *time.Duration, usages []certificatesv1.KeyUsage, privateKey interface{}) (reqName string, reqUID types.UID, err error) { + return RequestCertificateWithContext(context.Background(), client, csrData, name, signerName, requestedDuration, usages, privateKey) +} + +// RequestCertificateWithContext will either use an existing (if this process has run +// before but not to completion) or create a certificate signing request using the +// PEM encoded CSR and send it to API server. An optional requestedDuration may be passed +// to set the spec.expirationSeconds field on the CSR to control the lifetime of the issued +// certificate. This is not guaranteed as the signer may choose to ignore the request. +func RequestCertificateWithContext(ctx context.Context, client clientset.Interface, csrData []byte, name, signerName string, requestedDuration *time.Duration, usages []certificatesv1.KeyUsage, privateKey interface{}) (reqName string, reqUID types.UID, err error) { + csr := &certificatesv1.CertificateSigningRequest{ + // Username, UID, Groups will be injected by API server. + TypeMeta: metav1.TypeMeta{Kind: "CertificateSigningRequest"}, + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + Spec: certificatesv1.CertificateSigningRequestSpec{ + Request: csrData, + Usages: usages, + SignerName: signerName, + }, + } + if len(csr.Name) == 0 { + csr.GenerateName = "csr-" + } + if requestedDuration != nil { + csr.Spec.ExpirationSeconds = DurationToExpirationSeconds(*requestedDuration) + } + + reqName, reqUID, err = create(ctx, client, csr) + switch { + case err == nil: + return reqName, reqUID, err + + case apierrors.IsAlreadyExists(err) && len(name) > 0: + logger := klog.FromContext(ctx) + logger.Info("csr for this node already exists, reusing") + req, err := get(ctx, client, name) + if err != nil { + return "", "", formatError("cannot retrieve certificate signing request: %v", err) + } + if err := ensureCompatible(req, csr, privateKey); err != nil { + return "", "", fmt.Errorf("retrieved csr is not compatible: %v", err) + } + logger.Info("csr for this node is still valid") + return req.Name, req.UID, nil + + default: + return "", "", formatError("cannot create certificate signing request: %v", err) + } +} + +func DurationToExpirationSeconds(duration time.Duration) *int32 { + return pointer.Int32(int32(duration / time.Second)) +} + +func ExpirationSecondsToDuration(expirationSeconds int32) time.Duration { + return time.Duration(expirationSeconds) * time.Second +} + +func get(ctx context.Context, client clientset.Interface, name string) (*certificatesv1.CertificateSigningRequest, error) { + v1req, v1err := client.CertificatesV1().CertificateSigningRequests().Get(ctx, name, metav1.GetOptions{}) + if v1err == nil || !apierrors.IsNotFound(v1err) { + return v1req, v1err + } + + v1beta1req, v1beta1err := client.CertificatesV1beta1().CertificateSigningRequests().Get(ctx, name, metav1.GetOptions{}) + if v1beta1err != nil { + return nil, v1beta1err + } + + v1req = &certificatesv1.CertificateSigningRequest{ + ObjectMeta: v1beta1req.ObjectMeta, + Spec: certificatesv1.CertificateSigningRequestSpec{ + Request: v1beta1req.Spec.Request, + }, + } + if v1beta1req.Spec.SignerName != nil { + v1req.Spec.SignerName = *v1beta1req.Spec.SignerName + } + for _, usage := range v1beta1req.Spec.Usages { + v1req.Spec.Usages = append(v1req.Spec.Usages, certificatesv1.KeyUsage(usage)) + } + return v1req, nil +} + +func create(ctx context.Context, client clientset.Interface, csr *certificatesv1.CertificateSigningRequest) (reqName string, reqUID types.UID, err error) { + // only attempt a create via v1 if we specified signerName and usages and are not using the legacy unknown signerName + if len(csr.Spec.Usages) > 0 && len(csr.Spec.SignerName) > 0 && csr.Spec.SignerName != "kubernetes.io/legacy-unknown" { + v1req, v1err := client.CertificatesV1().CertificateSigningRequests().Create(ctx, csr, metav1.CreateOptions{}) + switch { + case v1err != nil && apierrors.IsNotFound(v1err): + // v1 CSR API was not found, continue to try v1beta1 + + case v1err != nil: + // other creation error + return "", "", v1err + + default: + // success + return v1req.Name, v1req.UID, v1err + } + } + + // convert relevant bits to v1beta1 + v1beta1csr := &certificatesv1beta1.CertificateSigningRequest{ + ObjectMeta: csr.ObjectMeta, + Spec: certificatesv1beta1.CertificateSigningRequestSpec{ + SignerName: &csr.Spec.SignerName, + Request: csr.Spec.Request, + }, + } + for _, usage := range csr.Spec.Usages { + v1beta1csr.Spec.Usages = append(v1beta1csr.Spec.Usages, certificatesv1beta1.KeyUsage(usage)) + } + + // create v1beta1 + v1beta1req, v1beta1err := client.CertificatesV1beta1().CertificateSigningRequests().Create(ctx, v1beta1csr, metav1.CreateOptions{}) + if v1beta1err != nil { + return "", "", v1beta1err + } + return v1beta1req.Name, v1beta1req.UID, nil +} + +// WaitForCertificate waits for a certificate to be issued until timeout, or returns an error. +func WaitForCertificate(ctx context.Context, client clientset.Interface, reqName string, reqUID types.UID) (certData []byte, err error) { + fieldSelector := fields.OneTermEqualSelector("metadata.name", reqName).String() + logger := klog.FromContext(ctx) + + var lw *cache.ListWatch + var obj runtime.Object + for { + // see if the v1 API is available + if _, err := client.CertificatesV1().CertificateSigningRequests().List(ctx, metav1.ListOptions{FieldSelector: fieldSelector}); err == nil { + // watch v1 objects + obj = &certificatesv1.CertificateSigningRequest{} + lw = &cache.ListWatch{ + ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { + options.FieldSelector = fieldSelector + return client.CertificatesV1().CertificateSigningRequests().List(ctx, options) + }, + WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { + options.FieldSelector = fieldSelector + return client.CertificatesV1().CertificateSigningRequests().Watch(ctx, options) + }, + } + break + } else { + logger.V(2).Info("Error fetching v1 certificate signing request", "err", err) + } + + // return if we've timed out + if err := ctx.Err(); err != nil { + return nil, wait.ErrWaitTimeout + } + + // see if the v1beta1 API is available + if _, err := client.CertificatesV1beta1().CertificateSigningRequests().List(ctx, metav1.ListOptions{FieldSelector: fieldSelector}); err == nil { + // watch v1beta1 objects + obj = &certificatesv1beta1.CertificateSigningRequest{} + lw = &cache.ListWatch{ + ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { + options.FieldSelector = fieldSelector + return client.CertificatesV1beta1().CertificateSigningRequests().List(ctx, options) + }, + WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { + options.FieldSelector = fieldSelector + return client.CertificatesV1beta1().CertificateSigningRequests().Watch(ctx, options) + }, + } + break + } else { + logger.V(2).Info("Error fetching v1beta1 certificate signing request", "err", err) + } + + // return if we've timed out + if err := ctx.Err(); err != nil { + return nil, wait.ErrWaitTimeout + } + + // wait and try again + time.Sleep(time.Second) + } + + var issuedCertificate []byte + _, err = watchtools.UntilWithSync( + ctx, + lw, + obj, + nil, + func(event watch.Event) (bool, error) { + switch event.Type { + case watch.Modified, watch.Added: + case watch.Deleted: + return false, fmt.Errorf("csr %q was deleted", reqName) + default: + return false, nil + } + + switch csr := event.Object.(type) { + case *certificatesv1.CertificateSigningRequest: + if csr.UID != reqUID { + return false, fmt.Errorf("csr %q changed UIDs", csr.Name) + } + approved := false + for _, c := range csr.Status.Conditions { + if c.Type == certificatesv1.CertificateDenied { + return false, fmt.Errorf("certificate signing request is denied, reason: %v, message: %v", c.Reason, c.Message) + } + if c.Type == certificatesv1.CertificateFailed { + return false, fmt.Errorf("certificate signing request failed, reason: %v, message: %v", c.Reason, c.Message) + } + if c.Type == certificatesv1.CertificateApproved { + approved = true + } + } + if approved { + if len(csr.Status.Certificate) > 0 { + logger.V(2).Info("Certificate signing request is issued", "csr", klog.KObj(csr)) + issuedCertificate = csr.Status.Certificate + return true, nil + } + logger.V(2).Info("Certificate signing request is approved, waiting to be issued", "csr", klog.KObj(csr)) + } + + case *certificatesv1beta1.CertificateSigningRequest: + if csr.UID != reqUID { + return false, fmt.Errorf("csr %q changed UIDs", csr.Name) + } + approved := false + for _, c := range csr.Status.Conditions { + if c.Type == certificatesv1beta1.CertificateDenied { + return false, fmt.Errorf("certificate signing request is denied, reason: %v, message: %v", c.Reason, c.Message) + } + if c.Type == certificatesv1beta1.CertificateFailed { + return false, fmt.Errorf("certificate signing request failed, reason: %v, message: %v", c.Reason, c.Message) + } + if c.Type == certificatesv1beta1.CertificateApproved { + approved = true + } + } + if approved { + if len(csr.Status.Certificate) > 0 { + logger.V(2).Info("Certificate signing request is issued", "csr", klog.KObj(csr)) + issuedCertificate = csr.Status.Certificate + return true, nil + } + logger.V(2).Info("Certificate signing request is approved, waiting to be issued", "csr", klog.KObj(csr)) + } + + default: + return false, fmt.Errorf("unexpected type received: %T", event.Object) + } + + return false, nil + }, + ) + if err == wait.ErrWaitTimeout { + return nil, wait.ErrWaitTimeout + } + if err != nil { + return nil, formatError("cannot watch on the certificate signing request: %v", err) + } + + return issuedCertificate, nil +} + +// ensureCompatible ensures that a CSR object is compatible with an original CSR +func ensureCompatible(new, orig *certificatesv1.CertificateSigningRequest, privateKey interface{}) error { + newCSR, err := parseCSR(new.Spec.Request) + if err != nil { + return fmt.Errorf("unable to parse new csr: %v", err) + } + origCSR, err := parseCSR(orig.Spec.Request) + if err != nil { + return fmt.Errorf("unable to parse original csr: %v", err) + } + if !reflect.DeepEqual(newCSR.Subject, origCSR.Subject) { + return fmt.Errorf("csr subjects differ: new: %#v, orig: %#v", newCSR.Subject, origCSR.Subject) + } + if len(new.Spec.SignerName) > 0 && len(orig.Spec.SignerName) > 0 && new.Spec.SignerName != orig.Spec.SignerName { + return fmt.Errorf("csr signerNames differ: new %q, orig: %q", new.Spec.SignerName, orig.Spec.SignerName) + } + signer, ok := privateKey.(crypto.Signer) + if !ok { + return fmt.Errorf("privateKey is not a signer") + } + newCSR.PublicKey = signer.Public() + if err := newCSR.CheckSignature(); err != nil { + return fmt.Errorf("error validating signature new CSR against old key: %v", err) + } + if len(new.Status.Certificate) > 0 { + certs, err := certutil.ParseCertsPEM(new.Status.Certificate) + if err != nil { + return fmt.Errorf("error parsing signed certificate for CSR: %v", err) + } + now := time.Now() + for _, cert := range certs { + if now.After(cert.NotAfter) { + return fmt.Errorf("one of the certificates for the CSR has expired: %s", cert.NotAfter) + } + } + } + return nil +} + +// formatError preserves the type of an API message but alters the message. Expects +// a single argument format string, and returns the wrapped error. +func formatError(format string, err error) error { + if s, ok := err.(apierrors.APIStatus); ok { + se := &apierrors.StatusError{ErrStatus: s.Status()} + se.ErrStatus.Message = fmt.Sprintf(format, se.ErrStatus.Message) + return se + } + return fmt.Errorf(format, err) +} + +// parseCSR extracts the CSR from the API object and decodes it. +func parseCSR(pemData []byte) (*x509.CertificateRequest, error) { + // extract PEM from request object + block, _ := pem.Decode(pemData) + if block == nil || block.Type != "CERTIFICATE REQUEST" { + return nil, fmt.Errorf("PEM block type must be CERTIFICATE REQUEST") + } + return x509.ParseCertificateRequest(block.Bytes) +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 80278e76df..c34014aa85 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1224,11 +1224,14 @@ k8s.io/client-go/tools/record k8s.io/client-go/tools/record/util k8s.io/client-go/tools/reference k8s.io/client-go/tools/remotecommand +k8s.io/client-go/tools/watch k8s.io/client-go/transport k8s.io/client-go/transport/spdy k8s.io/client-go/transport/websocket k8s.io/client-go/util/apply k8s.io/client-go/util/cert +k8s.io/client-go/util/certificate +k8s.io/client-go/util/certificate/csr k8s.io/client-go/util/connrotation k8s.io/client-go/util/consistencydetector k8s.io/client-go/util/exec