Skip to content

Commit a3aadb4

Browse files
authored
Merge pull request #836 from beraldoleal/kata-3967
Enable Confidential Containers on Baremetal
2 parents d31099e + ec99b1d commit a3aadb4

File tree

9 files changed

+370
-32
lines changed

9 files changed

+370
-32
lines changed

bundle/manifests/sandboxed-containers-operator.clusterserviceversion.yaml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ metadata:
1313
}
1414
]
1515
capabilities: Seamless Upgrades
16-
createdAt: "2025-10-06T16:02:13Z"
16+
createdAt: "2025-10-07T13:31:15Z"
1717
features.operators.openshift.io/cnf: "false"
1818
features.operators.openshift.io/cni: "false"
1919
features.operators.openshift.io/csi: "false"
@@ -155,6 +155,14 @@ spec:
155155
- nodes/status
156156
verbs:
157157
- patch
158+
- apiGroups:
159+
- ""
160+
resources:
161+
- pods
162+
verbs:
163+
- get
164+
- list
165+
- watch
158166
- apiGroups:
159167
- ""
160168
resources:

cmd/manager/main.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,15 @@ func main() {
167167
setupLog.Error(err, "unable to create controller", "controller", "Credentials")
168168
os.Exit(1)
169169
}
170+
171+
if err = (&controllers.RuntimeClassReconciler{
172+
Client: mgr.GetClient(),
173+
Scheme: mgr.GetScheme(),
174+
Log: ctrl.Log.WithName("controllers").WithName("RuntimeClass"),
175+
}).SetupWithManager(mgr); err != nil {
176+
setupLog.Error(err, "unable to create controller", "controller", "RuntimeClass")
177+
os.Exit(1)
178+
}
170179
// +kubebuilder:scaffold:builder
171180

172181
setupLog.Info("starting manager")

config/rbac/role.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,14 @@ rules:
1717
- nodes/status
1818
verbs:
1919
- patch
20+
- apiGroups:
21+
- ""
22+
resources:
23+
- pods
24+
verbs:
25+
- get
26+
- list
27+
- watch
2028
- apiGroups:
2129
- ""
2230
resources:

controllers/confidential_handler.go

Lines changed: 117 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,53 @@
11
package controllers
22

33
import (
4+
"context"
5+
"fmt"
6+
7+
corev1 "k8s.io/api/core/v1"
48
k8serrors "k8s.io/apimachinery/pkg/api/errors"
9+
"sigs.k8s.io/controller-runtime/pkg/client"
510
)
611

7-
// When the feature is enabled, handleFeatureConfidential sets config maps to confidential values.
8-
//
9-
// Changes the ImageConfigMap, so that the image creation job will create a confidential image.
10-
// This will happen at the first reconciliation loop, before the image creation job starts.
11-
//
12-
// Changes the peer pods configMap to enable confidential.
13-
// This will happen likely after several reconciliation loops, because it has prerequisites:
12+
const (
13+
// kata-cc runtime class for CoCo BM
14+
kataCCRuntimeClassName = "kata-cc"
15+
kataCCRuntimeClassCpuOverhead = "0.25"
16+
kataCCRuntimeClassMemOverhead = "350Mi"
17+
18+
// TEE node labels
19+
intelTDXNodeLabel = "intel.feature.node.kubernetes.io/tdx"
20+
amdSNPNodeLabel = "amd.feature.node.kubernetes.io/snp"
21+
22+
// RuntimeClass handlers for TEE
23+
kataCCIntelHandler = "kata-cc-intel"
24+
kataCCAmdHandler = "kata-cc-amd"
25+
)
26+
27+
// When the feature is enabled, handleFeatureConfidential configures confidential computing support.
1428
//
15-
// - Peer pods must be enabled in the KataConfig.
16-
// - The peer pods config map must exist.
29+
// For peer pods: sets ImageConfigMap and peer pods configMap to enable confidential images and CVM support.
30+
// For baremetal: creates kata-cc runtime classes with TEE-specific handlers (Intel TDX or AMD SNP).
1731
//
18-
// When the feature is disabled, handleFeatureConfidential resets the config maps to non-confidential values.
32+
// When the feature is disabled, handleFeatureConfidential resets config maps and deletes runtime classes.
1933
func (r *KataConfigOpenShiftReconciler) handleFeatureConfidential(state FeatureGateState) error {
34+
if r.kataConfig.Spec.EnablePeerPods {
35+
if err := r.handleConfidentialPeerPods(state); err != nil {
36+
return err
37+
}
38+
}
2039

21-
// ImageConfigMap
40+
if err := r.handleConfidentialBaremetal(state); err != nil {
41+
return err
42+
}
2243

44+
return nil
45+
}
46+
47+
// handleConfidentialPeerPods configures confidential computing for peer pods deployments.
48+
// It manages ImageConfigMap and peer pods configMap to control confidential images and CVM support.
49+
func (r *KataConfigOpenShiftReconciler) handleConfidentialPeerPods(state FeatureGateState) error {
50+
// ImageConfigMap
2351
if err := InitializeImageGenerator(r.Client); err != nil {
2452
return err
2553
}
@@ -56,8 +84,6 @@ func (r *KataConfigOpenShiftReconciler) handleFeatureConfidential(state FeatureG
5684
}
5785
}
5886

59-
// peer pods config
60-
6187
// Patch peer pods configMap, if it exists.
6288
var peerpodsCMData map[string]string
6389
if state == Enabled {
@@ -76,3 +102,81 @@ func (r *KataConfigOpenShiftReconciler) handleFeatureConfidential(state FeatureG
76102

77103
return nil
78104
}
105+
106+
// handleConfidentialBaremetal configures confidential computing for baremetal deployments.
107+
// It manages kata-cc runtime classes with TEE-specific handlers (Intel TDX or AMD SNP).
108+
func (r *KataConfigOpenShiftReconciler) handleConfidentialBaremetal(state FeatureGateState) error {
109+
if state == Enabled {
110+
r.Log.Info("Creating " + kataCCRuntimeClassName + " runtime class for confidential containers")
111+
112+
handler, nodeLabel, err := r.computeTEEHandlerAndLabel()
113+
if err != nil {
114+
// If peer pods is enabled, just warn and skip baremetal coco
115+
if r.kataConfig.Spec.EnablePeerPods {
116+
r.Log.Info("WARNING: No TEE hardware detected, skipping baremetal confidential containers (peer pods CVM will handle confidential workloads)", "err", err)
117+
return nil
118+
}
119+
// If peer pods disabled, this is an error - user wants baremetal coco but no hardware
120+
r.Log.Info("failed to detect TEE platform", "err", err)
121+
return err
122+
}
123+
124+
// Create kata-cc runtime class restricted to the detected TEE subset
125+
err = r.createRuntimeClass(kataCCRuntimeClassName, kataCCRuntimeClassCpuOverhead, kataCCRuntimeClassMemOverhead, handler, nodeLabel)
126+
if err != nil {
127+
r.Log.Info("Error creating "+kataCCRuntimeClassName+" runtime class", "err", err)
128+
return fmt.Errorf("Error creating "+kataCCRuntimeClassName+" runtime class: %w", err)
129+
}
130+
131+
} else {
132+
r.Log.Info("Deleting " + kataCCRuntimeClassName + " runtime class for confidential containers")
133+
134+
// Delete kata-cc runtime class
135+
err := r.deleteRuntimeClass(kataCCRuntimeClassName)
136+
if err != nil {
137+
r.Log.Info("Error deleting "+kataCCRuntimeClassName+" runtime class", "err", err)
138+
return fmt.Errorf("Error deleting "+kataCCRuntimeClassName+" runtime class: %w", err)
139+
}
140+
}
141+
142+
return nil
143+
}
144+
145+
func (r *KataConfigOpenShiftReconciler) computeTEEHandlerAndLabel() (string, string, error) {
146+
selector, err := r.getKataConfigNodeSelectorAsSelector()
147+
if err != nil {
148+
return "", "", fmt.Errorf("failed to build node selector: %w", err)
149+
}
150+
151+
nodes := &corev1.NodeList{}
152+
listOpts := []client.ListOption{
153+
client.MatchingLabelsSelector{Selector: selector},
154+
}
155+
if err := r.Client.List(context.TODO(), nodes, listOpts...); err != nil {
156+
return "", "", fmt.Errorf("failed to list nodes: %w", err)
157+
}
158+
159+
var hasIntelTDX bool
160+
var hasAmdSNP bool
161+
for _, n := range nodes.Items {
162+
if v, ok := n.Labels[intelTDXNodeLabel]; ok && v == "true" {
163+
hasIntelTDX = true
164+
}
165+
if v, ok := n.Labels[amdSNPNodeLabel]; ok && v == "true" {
166+
hasAmdSNP = true
167+
}
168+
}
169+
170+
if hasIntelTDX && hasAmdSNP {
171+
return "", "", fmt.Errorf("multiple TEE platforms detected; only one per cluster supported")
172+
}
173+
174+
if hasIntelTDX {
175+
return kataCCIntelHandler, intelTDXNodeLabel, nil
176+
}
177+
if hasAmdSNP {
178+
return kataCCAmdHandler, amdSNPNodeLabel, nil
179+
}
180+
181+
return "", "", fmt.Errorf("no TEE platform labels found (expected %s or %s)", intelTDXNodeLabel, amdSNPNodeLabel)
182+
}

controllers/fg_handler.go

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -110,19 +110,17 @@ func (r *KataConfigOpenShiftReconciler) processFeatureGates() error {
110110

111111
// Check which feature gates are enabled in the FG ConfigMap and
112112
// perform the necessary actions
113-
if r.kataConfig.Spec.EnablePeerPods {
114-
if fgStatus.IsEnabled(ConfidentialFeatureGate) {
115-
r.Log.Info("Feature gate is enabled", "featuregate", ConfidentialFeatureGate)
116-
// Perform the necessary actions
117-
if err := r.handleFeatureConfidential(Enabled); err != nil {
118-
return err
119-
}
120-
} else {
121-
r.Log.Info("Feature gate is disabled", "featuregate", ConfidentialFeatureGate)
122-
// Perform the necessary actions
123-
if err := r.handleFeatureConfidential(Disabled); err != nil {
124-
return err
125-
}
113+
if fgStatus.IsEnabled(ConfidentialFeatureGate) {
114+
r.Log.Info("Feature gate is enabled", "featuregate", ConfidentialFeatureGate)
115+
// Perform the necessary actions
116+
if err := r.handleFeatureConfidential(Enabled); err != nil {
117+
return err
118+
}
119+
} else {
120+
r.Log.Info("Feature gate is disabled", "featuregate", ConfidentialFeatureGate)
121+
// Perform the necessary actions
122+
if err := r.handleFeatureConfidential(Disabled); err != nil {
123+
return err
126124
}
127125
}
128126

controllers/migrate.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,13 @@ import (
1616
"context"
1717

1818
corev1 "k8s.io/api/core/v1"
19+
nodeapi "k8s.io/api/node/v1"
1920
k8serrors "k8s.io/apimachinery/pkg/api/errors"
2021
meta "k8s.io/apimachinery/pkg/api/meta"
2122
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
23+
"k8s.io/apimachinery/pkg/types"
2224
"sigs.k8s.io/controller-runtime/pkg/client"
25+
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
2326
)
2427

2528
// migratePeerPodsLimit moves the PeerPodConfig "Limit" value to peer-pods-cm
@@ -81,3 +84,32 @@ func (r *KataConfigOpenShiftReconciler) migratePeerPodsLimit() error {
8184
r.Log.Info("Successfully migrated PeerPodConfig Limit to peer-pods-cm and deleted PeerPodConfig")
8285
return nil
8386
}
87+
88+
// ensureRuntimeClassFinalizers adds finalizers to existing runtime classes that don't have them
89+
func (r *KataConfigOpenShiftReconciler) ensureRuntimeClassFinalizers() error {
90+
runtimeClasses := []string{
91+
kataCCRuntimeClassName,
92+
peerpodsRuntimeClassName,
93+
kataRuntimeClassName,
94+
}
95+
96+
for _, name := range runtimeClasses {
97+
rc := &nodeapi.RuntimeClass{}
98+
err := r.Client.Get(context.TODO(), types.NamespacedName{Name: name}, rc)
99+
if k8serrors.IsNotFound(err) {
100+
continue
101+
}
102+
if err != nil {
103+
return err
104+
}
105+
106+
if !controllerutil.ContainsFinalizer(rc, runtimeClassFinalizerName) {
107+
r.Log.Info("Adding finalizer to existing runtime class", "runtimeClass", name)
108+
controllerutil.AddFinalizer(rc, runtimeClassFinalizerName)
109+
if err := r.Client.Update(context.TODO(), rc); err != nil {
110+
return err
111+
}
112+
}
113+
}
114+
return nil
115+
}

controllers/openshift_controller.go

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,12 @@ func (r *KataConfigOpenShiftReconciler) Reconcile(ctx context.Context, req ctrl.
127127
return ctrl.Result{}, err
128128
}
129129

130+
err = r.ensureRuntimeClassFinalizers()
131+
if err != nil {
132+
r.Log.Info("Failed to ensure runtime class finalizers", "err", err)
133+
return ctrl.Result{}, err
134+
}
135+
130136
err = r.processFeatureGates()
131137
if err != nil {
132138
r.Log.Info("Unable to process feature gates", "err", err)
@@ -772,7 +778,7 @@ func (r *KataConfigOpenShiftReconciler) createDaemonsetForMonitor() error {
772778
return nil
773779
}
774780

775-
func (r *KataConfigOpenShiftReconciler) createRuntimeClass(runtimeClassName string, cpuOverhead string, memoryOverhead string) error {
781+
func (r *KataConfigOpenShiftReconciler) createRuntimeClass(runtimeClassName string, cpuOverhead string, memoryOverhead string, handler string, additionalNodeLabel string) error {
776782

777783
rc := func() *nodeapi.RuntimeClass {
778784
rc := &nodeapi.RuntimeClass{
@@ -781,9 +787,10 @@ func (r *KataConfigOpenShiftReconciler) createRuntimeClass(runtimeClassName stri
781787
Kind: "RuntimeClass",
782788
},
783789
ObjectMeta: metav1.ObjectMeta{
784-
Name: runtimeClassName,
790+
Name: runtimeClassName,
791+
Finalizers: []string{runtimeClassFinalizerName},
785792
},
786-
Handler: runtimeClassName,
793+
Handler: handler,
787794
Overhead: &nodeapi.Overhead{
788795
PodFixed: corev1.ResourceList{
789796
corev1.ResourceCPU: resource.MustParse(cpuOverhead),
@@ -794,6 +801,11 @@ func (r *KataConfigOpenShiftReconciler) createRuntimeClass(runtimeClassName stri
794801

795802
nodeSelector := r.getNodeSelectorAsMap()
796803

804+
// Add additional node label if provided
805+
if additionalNodeLabel != "" {
806+
nodeSelector[additionalNodeLabel] = "true"
807+
}
808+
797809
rc.Scheduling = &nodeapi.Scheduling{
798810
NodeSelector: nodeSelector,
799811
}
@@ -829,6 +841,34 @@ func (r *KataConfigOpenShiftReconciler) createRuntimeClass(runtimeClassName stri
829841
return nil
830842
}
831843

844+
func (r *KataConfigOpenShiftReconciler) deleteRuntimeClass(runtimeClassName string) error {
845+
846+
foundRc := &nodeapi.RuntimeClass{}
847+
err := r.Client.Get(context.TODO(), types.NamespacedName{Name: runtimeClassName}, foundRc)
848+
if err != nil {
849+
if k8serrors.IsNotFound(err) {
850+
return nil
851+
}
852+
return err
853+
}
854+
855+
if err := r.Client.Delete(context.TODO(), foundRc); err != nil {
856+
if k8serrors.IsNotFound(err) {
857+
return nil
858+
}
859+
return err
860+
}
861+
862+
for i, name := range r.kataConfig.Status.RuntimeClasses {
863+
if name == runtimeClassName {
864+
r.kataConfig.Status.RuntimeClasses = append(r.kataConfig.Status.RuntimeClasses[:i], r.kataConfig.Status.RuntimeClasses[i+1:]...)
865+
break
866+
}
867+
}
868+
869+
return nil
870+
}
871+
832872
// "KataConfigNodeSelector" in the names of the following couple of helper
833873
// functions refers to the value of KataConfig.spec.kataConfigPoolSelector,
834874
// i.e. the original selector supplied by the user of KataConfig.
@@ -2197,7 +2237,7 @@ func (r *KataConfigOpenShiftReconciler) deleteScc() error {
21972237
func (r *KataConfigOpenShiftReconciler) postKataInstallation() (*ctrl.Result, error) {
21982238
r.Log.Info("create runtime class")
21992239
r.resetInProgressCondition()
2200-
err := r.createRuntimeClass(kataRuntimeClassName, kataRuntimeClassCpuOverhead, kataRuntimeClassMemOverhead)
2240+
err := r.createRuntimeClass(kataRuntimeClassName, kataRuntimeClassCpuOverhead, kataRuntimeClassMemOverhead, kataRuntimeClassName, "")
22012241
if err != nil {
22022242
return &ctrl.Result{Requeue: true, RequeueAfter: 15 * time.Second}, err
22032243
}

controllers/peerpods.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,7 @@ func (r *KataConfigOpenShiftReconciler) enablePeerPodsMiscConfigs() error {
321321
}
322322

323323
// Create runtimeClass config for peer-pods
324-
err = r.createRuntimeClass(peerpodsRuntimeClassName, peerpodsRuntimeClassCpuOverhead, peerpodsRuntimeClassMemOverhead)
324+
err = r.createRuntimeClass(peerpodsRuntimeClassName, peerpodsRuntimeClassCpuOverhead, peerpodsRuntimeClassMemOverhead, peerpodsRuntimeClassName, "")
325325
if err != nil {
326326
r.Log.Info("Error in creating kata remote runtimeclass", "err", err)
327327
return err

0 commit comments

Comments
 (0)