Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GCP: introducing provisioning code and tests #2290

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/cloud-api-adaptor/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ require (
github.com/spf13/cobra v1.7.0
golang.org/x/crypto v0.31.0
golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2
google.golang.org/api v0.162.0
google.golang.org/protobuf v1.33.0
k8s.io/api v0.26.2
k8s.io/apimachinery v0.26.2
Expand Down Expand Up @@ -194,7 +195,6 @@ require (
golang.org/x/text v0.21.0 // indirect
golang.org/x/time v0.5.0 // indirect
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect
google.golang.org/api v0.162.0 // indirect
google.golang.org/appengine v1.6.8 // indirect
google.golang.org/genproto v0.0.0-20240227224415-6ceb2ff114de // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20240213162025-012b6fc9bca9 // indirect
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ configMapGenerator:
namespace: confidential-containers-system
literals:
- CLOUD_PROVIDER="gcp"
#- PAUSE_IMAGE="" # Uncomment and set if you want to use a specific pause image
#- TUNNEL_TYPE="" # Uncomment and set if you want to use a specific tunnel type. Defaults to vxlan
#- VXLAN_PORT="" # Uncomment and set if you want to use a specific vxlan port. Defaults to 4789
- PODVM_IMAGE_NAME="" # set from step "Build Pod VM Image" in gcp/README.md
- GCP_PROJECT_ID="" # set
- GCP_ZONE="" # set e.g. "us-west1-a"
- GCP_MACHINE_TYPE="e2-medium" # replace if needed. caa defaults to e2-medium
- GCP_NETWORK="global/networks/default" # replace if needed.
#- PAUSE_IMAGE="" # Uncomment and set if you want to use a specific pause image
#- TUNNEL_TYPE="" # Uncomment and set if you want to use a specific tunnel type. Defaults to vxlan
#- VXLAN_PORT="" # Uncomment and set if you want to use a specific vxlan port. Defaults to 4789
##TLS_SETTINGS
#- CACERT_FILE="/etc/certificates/ca.crt" # for TLS
#- CERT_FILE="/etc/certificates/client.crt" # for TLS
Expand All @@ -40,7 +40,7 @@ secretGenerator:
- name: peer-pods-secret
namespace: confidential-containers-system
files:
- GCP_CREDENTIALS # make sure this file has the application credentials. You can reuse the Packer creds created in "Build Pod VM Image"
#- GCP_CREDENTIALS # make sure this file has the application credentials. You can reuse the Packer creds created in "Build Pod VM Image"
##TLS_SETTINGS
#- name: certs-for-tls
# namespace: confidential-containers-system
Expand Down
16 changes: 16 additions & 0 deletions src/cloud-api-adaptor/test/e2e/gcp_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
//go:build gcp

// (C) Copyright Confidential Containers Contributors
// SPDX-License-Identifier: Apache-2.0

package e2e

import (
"testing"

_ "github.com/confidential-containers/cloud-api-adaptor/src/cloud-api-adaptor/test/provisioner/gcp"
)

func TestFoo(t *testing.T) {
t.Log("Hello, World!")
}
337 changes: 337 additions & 0 deletions src/cloud-api-adaptor/test/provisioner/gcp/cluster.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,337 @@
// (C) Copyright Confidential Containers Contributors
// SPDX-License-Identifier: Apache-2.0

package gcp

import (
"bytes"
"context"
"fmt"
"os"
"os/exec"
"path/filepath"
"strconv"
"time"

log "github.com/sirupsen/logrus"
"google.golang.org/api/container/v1"
"google.golang.org/api/googleapi"
"google.golang.org/api/option"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/clientcmd"
"k8s.io/client-go/util/retry"
kconf "sigs.k8s.io/e2e-framework/klient/conf"

appsv1 "k8s.io/api/apps/v1"
"k8s.io/apimachinery/pkg/util/yaml"
)

// GKECluster implements the basic GKE Cluster client operations.
type GKECluster struct {
clusterName string
clusterVersion string
clusterMachineType string
credentials string
nodeCount int64
projectID string
zone string
cluster *container.Cluster
}

// NewGKECluster creates a new GKECluster with the given properties
func NewGKECluster(properties map[string]string) (*GKECluster, error) {
defaults := map[string]string{
"cluster_name": "e2e-peer-pods",
"cluster_version": "1.31.4-gke.1256000",
"cluster_machine_type": "n1-standard-1",
"node_count": "2",
}

for key, value := range properties {
defaults[key] = value
}

requiredFields := []string{"project_id", "credentials", "zone"}
for _, field := range requiredFields {
if _, ok := defaults[field]; !ok {
return nil, fmt.Errorf("%s is required", field)
}
}

nodeCount, err := strconv.ParseInt(defaults["node_count"], 10, 64)
if err != nil {
return nil, fmt.Errorf("invalid node_count: %v", err)
}

return &GKECluster{
clusterName: defaults["cluster_name"],
clusterVersion: defaults["cluster_version"],
clusterMachineType: defaults["cluster_machine_type"],
credentials: defaults["credentials"],
nodeCount: nodeCount,
projectID: defaults["project_id"],
zone: defaults["zone"],
cluster: nil,
}, nil
}

// Apply basic labels to worker nodes
func (g *GKECluster) ApplyNodeLabels(ctx context.Context) error {
kubeconfigPath, err := g.GetKubeconfigFile(ctx)
if err != nil {
return err
}

config, err := clientcmd.BuildConfigFromFlags("", kubeconfigPath)
if err != nil {
return fmt.Errorf("failed to build kubeconfig: %v", err)
}

clientset, err := kubernetes.NewForConfig(config)
if err != nil {
return fmt.Errorf("failed to create clientset: %v", err)
}

nodes, err := clientset.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
if err != nil {
return fmt.Errorf("failed to list nodes: %v", err)
}

for _, node := range nodes.Items {
err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
n, err := clientset.CoreV1().Nodes().Get(ctx, node.Name, metav1.GetOptions{})
if err != nil {
return fmt.Errorf("failed to get node: %v", err)
}

n.Labels["node.kubernetes.io/worker"] = ""
n.Labels["node-role.kubernetes.io/worker"] = ""
_, err = clientset.CoreV1().Nodes().Update(ctx, n, metav1.UpdateOptions{})
return err
})
if err != nil {
return fmt.Errorf("Failed to label node %s: %v\n", node.Name, err)
}
log.Infof("Successfully labeled node %s\n", node.Name)
}
return nil
}

// CreateCluster creates the GKE cluster
func (g *GKECluster) CreateCluster(ctx context.Context) error {
ctx, cancel := context.WithTimeout(ctx, time.Hour)
defer cancel()

srv, err := container.NewService(
ctx, option.WithCredentialsFile(g.credentials),
)
if err != nil {
return fmt.Errorf("GKE: container.NewService: %v", err)
}

cluster := &container.Cluster{
Name: g.clusterName,
InitialNodeCount: g.nodeCount,
InitialClusterVersion: g.clusterVersion,
NodeConfig: &container.NodeConfig{
MachineType: g.clusterMachineType,
ImageType: "UBUNTU_CONTAINERD", // Default CO OS has a ro fs.
},
}

req := &container.CreateClusterRequest{
Cluster: cluster,
}

op, err := srv.Projects.Zones.Clusters.Create(
g.projectID, g.zone, req,
).Context(ctx).Do()
if err != nil {
return fmt.Errorf("GKE: Projects.Zones.Clusters.Create: %v", err)
}

log.Infof("GKE: Cluster creation operation: %v\n", op.Name)

g.cluster, err = g.WaitForClusterActive(ctx, 30*time.Minute)
if err != nil {
return fmt.Errorf("GKE: Error waiting for cluster to become active: %v", err)
}

yamlPath := "/tmp/containerdDaemonSet.yaml"
err = g.DeployDaemonSet(ctx, yamlPath)
if err != nil {
return fmt.Errorf("GKE: Error injecting DaemonSet to update containerd: %v", err)
}
err = g.ApplyNodeLabels(ctx)
if err != nil {
return fmt.Errorf("GKE: Error applying node labels: %v", err)
}
return nil
}

// DeleteCluster deletes the GKE cluster
func (g *GKECluster) DeleteCluster(ctx context.Context) error {
ctx, cancel := context.WithTimeout(ctx, time.Hour)
defer cancel()

srv, err := container.NewService(
ctx, option.WithCredentialsFile(g.credentials),
)
if err != nil {
return fmt.Errorf("GKE: container.NewService: %v", err)
}

op, err := srv.Projects.Zones.Clusters.Delete(
g.projectID, g.zone, g.clusterName,
).Context(ctx).Do()
if err != nil {
return fmt.Errorf("GKE: Projects.Zones.Clusters.Delete: %v", err)
}

log.Infof("GKE: Cluster deletion operation: %v\n", op.Name)

// Wait for the cluster to be deleted
activationTimeout := 30 * time.Minute
err = g.WaitForClusterDeleted(ctx, activationTimeout)
if err != nil {
return fmt.Errorf("GKE: error waiting for cluster to be deleted: %v", err)
}
return nil
}

// DeployDaemonSet is used here because we need to patch containerd config file.
func (g *GKECluster) DeployDaemonSet(ctx context.Context, yamlPath string) error {
kubeconfigPath, err := g.GetKubeconfigFile(ctx)
if err != nil {
return err
}

kubeConfig, err := os.ReadFile(filepath.Clean(kubeconfigPath))
if err != nil {
return fmt.Errorf("failed to read kubeconfig file: %w", err)
}

config, err := clientcmd.RESTConfigFromKubeConfig(kubeConfig)
if err != nil {
return fmt.Errorf("failed to load kubeconfig: %w", err)
}

clientset, err := kubernetes.NewForConfig(config)
if err != nil {
return fmt.Errorf("failed to create Kubernetes client: %w", err)
}

yamlFile, err := os.ReadFile(filepath.Clean(yamlPath))
if err != nil {
return fmt.Errorf("failed to read DaemonSet YAML file: %w", err)
}

decoder := yaml.NewYAMLOrJSONDecoder(bytes.NewReader(yamlFile), 4096)
ds := &appsv1.DaemonSet{}
if err := decoder.Decode(ds); err != nil {
return fmt.Errorf("failed to decode DaemonSet YAML: %w", err)
}

_, err = clientset.AppsV1().DaemonSets(ds.Namespace).Create(ctx, ds, metav1.CreateOptions{})
if err != nil {
return fmt.Errorf("failed to deploy DaemonSet: %w", err)
}

log.Info("DaemonSet deployed successfully!")
return nil
}

// GetKubeconfigFile retrieves the path to the kubeconfig file
func (g *GKECluster) GetKubeconfigFile(ctx context.Context) (string, error) {
cmd := exec.CommandContext(ctx, "gcloud", "container", "clusters", "get-credentials", g.clusterName, "--zone", g.zone, "--project", g.projectID)
output, err := cmd.CombinedOutput()

if err != nil {
return "", fmt.Errorf("Failed to get cluster credentials: %v\nOutput: %s", err, output)
}

if g.cluster == nil {
return "", fmt.Errorf("Cluster not found. Call CreateCluster() first.")
}

kubeconfigPath := kconf.ResolveKubeConfigFile()
_, err = os.Stat(kubeconfigPath)
if err != nil {
return "", fmt.Errorf("Failed to resolve KubeConfigfile: %v", err)
}
return kubeconfigPath, nil
}

// WaitForClusterActive waits until the GKE cluster is active
func (g *GKECluster) WaitForClusterActive(
ctx context.Context, activationTimeout time.Duration,
) (*container.Cluster, error) {
srv, err := container.NewService(
ctx, option.WithCredentialsFile(g.credentials),
)
if err != nil {
return nil, fmt.Errorf("GKE: container.NewService: %v", err)
}

timeoutCtx, cancel := context.WithTimeout(ctx, activationTimeout)
defer cancel()

ticker := time.NewTicker(5 * time.Second)
defer ticker.Stop()

for {
select {
case <-timeoutCtx.Done():
return nil, fmt.Errorf("GKE: Reached timeout waiting for cluster.")
case <-ticker.C:
cluster, err := srv.Projects.Zones.Clusters.Get(g.projectID, g.zone, g.clusterName).Context(ctx).Do()
if err != nil {
return nil, fmt.Errorf("GKE: Projects.Zones.Clusters.Get: %v", err)
}

if cluster.Status == "RUNNING" {
log.Info("GKE: Cluster is now active")
return cluster, nil
}

log.Info("GKE: Waiting for cluster to become active...")
}
}
}

// WaitForClusterDeleted waits until the GKE cluster is deleted
func (g *GKECluster) WaitForClusterDeleted(
ctx context.Context, activationTimeout time.Duration,
) error {
srv, err := container.NewService(
ctx, option.WithCredentialsFile(g.credentials),
)
if err != nil {
return fmt.Errorf("GKE: container.NewService: %v", err)
}

timeoutCtx, cancel := context.WithTimeout(ctx, activationTimeout)
defer cancel()

ticker := time.NewTicker(5 * time.Second)
defer ticker.Stop()

for {
select {
case <-timeoutCtx.Done():
return fmt.Errorf("GKE: timeout waiting for cluster deletion")
case <-ticker.C:
_, err := srv.Projects.Zones.Clusters.Get(g.projectID, g.zone, g.clusterName).Context(ctx).Do()
if err != nil {
if gerr, ok := err.(*googleapi.Error); ok && gerr.Code == 404 {
log.Info("GKE: Cluster deleted successfully")
return nil
}
return fmt.Errorf("GKE: Projects.Zones.Clusters.Get: %v", err)
}

log.Info("GKE: Waiting for cluster to be deleted...")
}
}
}
Loading
Loading