Skip to content

Commit

Permalink
feat: add production mode to k8s (#1963)
Browse files Browse the repository at this point in the history
## Description:
1. Services restart on failure like Docker
2. /kurtosis-data/ is in a PV on k8s so APIC is restartable

## Is this change user facing?
YES
  • Loading branch information
h4ck3rk3y authored Dec 15, 2023
1 parent b2fd9f2 commit b0e27e6
Show file tree
Hide file tree
Showing 12 changed files with 149 additions and 69 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ const (

containerTypeLabelKeyStr = labelNamespaceStr + "container-type"
volumeTypeLabelKeyStr = labelNamespaceStr + "volume-type"
enclaveTypeLabelKeyStr = labelNamespaceStr + "enclave-type"

// A label to identify a Kurtosis resource (e.g. network, container, etc.) by its id
idLabelKeyStr = labelNamespaceStr + "id"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,8 @@ func createEnginePod(
engineContainers,
engineVolumes,
serviceAccountName,
// Engine doesn't auto restart
apiv1.RestartPolicyNever,
)
if err != nil {
return nil, nil, stacktrace.Propagate(err, "An error occurred while creating the pod with name '%s' in namespace '%s' with image '%s'", enginePodName, namespace, containerImageAndTag)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package kubernetes_kurtosis_backend
import (
"context"
"io"
apiv1 "k8s.io/api/core/v1"

"github.com/kurtosis-tech/kurtosis/container-engine-lib/lib/backend_impls/kubernetes/kubernetes_kurtosis_backend/engine_functions"
"github.com/kurtosis-tech/kurtosis/container-engine-lib/lib/backend_impls/kubernetes/kubernetes_kurtosis_backend/shared_helpers"
Expand All @@ -26,6 +27,7 @@ import (

const (
isResourceInformationComplete = false
noProductionMode = false
)

type KubernetesKurtosisBackend struct {
Expand All @@ -39,6 +41,9 @@ type KubernetesKurtosisBackend struct {

// Will only be filled out for the API container
apiContainerModeArgs *shared_helpers.ApiContainerModeArgs

// Whether services should be restarted
productionMode bool
}

func (backend *KubernetesKurtosisBackend) DumpKurtosis(ctx context.Context, outputDirpath string) error {
Expand All @@ -52,6 +57,7 @@ func newKubernetesKurtosisBackend(
cliModeArgs *shared_helpers.CliModeArgs,
engineServerModeArgs *shared_helpers.EngineServerModeArgs,
apiContainerModeArgs *shared_helpers.ApiContainerModeArgs,
productionMoe bool,
) *KubernetesKurtosisBackend {
objAttrsProvider := object_attributes_provider.GetKubernetesObjectAttributesProvider()
return &KubernetesKurtosisBackend{
Expand All @@ -60,6 +66,7 @@ func newKubernetesKurtosisBackend(
cliModeArgs: cliModeArgs,
engineServerModeArgs: engineServerModeArgs,
apiContainerModeArgs: apiContainerModeArgs,
productionMode: productionMoe,
}
}

Expand All @@ -68,13 +75,15 @@ func NewAPIContainerKubernetesKurtosisBackend(
ownEnclaveUuid enclave.EnclaveUUID,
ownNamespaceName string,
storageClassName string,
productionMode bool,
) *KubernetesKurtosisBackend {
modeArgs := shared_helpers.NewApiContainerModeArgs(ownEnclaveUuid, ownNamespaceName, storageClassName)
return newKubernetesKurtosisBackend(
kubernetesManager,
nil,
nil,
modeArgs,
productionMode,
)
}

Expand All @@ -87,6 +96,7 @@ func NewEngineServerKubernetesKurtosisBackend(
nil,
modeArgs,
nil,
noProductionMode,
)
}

Expand All @@ -99,28 +109,10 @@ func NewCLIModeKubernetesKurtosisBackend(
modeArgs,
nil,
nil,
noProductionMode,
)
}

func NewKubernetesKurtosisBackend(
kubernetesManager *kubernetes_manager.KubernetesManager,
// TODO Remove the necessity for these different args by splitting the *KubernetesKurtosisBackend into multiple
// backends per consumer, e.g. APIContainerKurtosisBackend, CLIKurtosisBackend, EngineKurtosisBackend, etc.
// This can only happen once the CLI no longer uses the same functionality as API container, engine, etc. though
cliModeArgs *shared_helpers.CliModeArgs,
engineServerModeArgs *shared_helpers.EngineServerModeArgs,
apiContainerModeargs *shared_helpers.ApiContainerModeArgs,
) *KubernetesKurtosisBackend {
objAttrsProvider := object_attributes_provider.GetKubernetesObjectAttributesProvider()
return &KubernetesKurtosisBackend{
kubernetesManager: kubernetesManager,
objAttrsProvider: objAttrsProvider,
cliModeArgs: cliModeArgs,
engineServerModeArgs: engineServerModeArgs,
apiContainerModeArgs: apiContainerModeargs,
}
}

func (backend *KubernetesKurtosisBackend) FetchImage(ctx context.Context, image string, downloadMode image_download_mode.ImageDownloadMode) (bool, string, error) {
logrus.Warnf("FetchImage isn't implemented for Kubernetes yet")
return false, "", nil
Expand Down Expand Up @@ -261,14 +253,20 @@ func (backend *KubernetesKurtosisBackend) StartRegisteredUserServices(
map[service.ServiceUUID]error,
error,
) {
restartPolicy := apiv1.RestartPolicyNever
if backend.productionMode {
restartPolicy = apiv1.RestartPolicyOnFailure
}

successfullyStartedServices, failedServices, err := user_services_functions.StartRegisteredUserServices(
ctx,
enclaveUuid,
services,
backend.cliModeArgs,
backend.apiContainerModeArgs,
backend.engineServerModeArgs,
backend.kubernetesManager)
backend.kubernetesManager,
restartPolicy)
if err != nil {
var serviceUuids []service.ServiceUUID
for serviceUuid := range services {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ const (
timeBetweenWaitForApiContainerContainerAvailabilityRetries = 1 * time.Second

enclaveDataDirVolumeName = "enclave-data"

enclaveDataDirVolumeSize int64 = 1 * 1024 * 1024 * 1024 // 1g minimum size on Kubernetes
)

var noWait *port_spec.Wait = nil
Expand Down Expand Up @@ -412,13 +414,44 @@ func (backend *KubernetesKurtosisBackend) CreateAPIContainer(
return nil, stacktrace.Propagate(err, "An error occurred getting container ports from the API container's private port specs")
}

volumeAttrs, err := enclaveAttributesProvider.ForEnclaveDataDirVolume()
if err != nil {
return nil, stacktrace.Propagate(err, "An error occurred creating the labels for enclave data dir volume")
}

volumeLabelsStrs := map[string]string{}
for key, value := range volumeAttrs.GetLabels() {
volumeLabelsStrs[key.GetString()] = value.GetString()
}
if _, err = backend.kubernetesManager.CreatePersistentVolumeClaim(ctx, enclaveNamespaceName, enclaveDataDirVolumeName, volumeLabelsStrs, enclaveDataDirVolumeSize); err != nil {
return nil, stacktrace.Propagate(err, "An error occurred creating the persistent volume claim for enclave data dir volume for enclave '%s'", enclaveDataDirVolumeName)
}
shouldDeleteVolumeClaim := true

defer func() {
if !shouldDeleteVolumeClaim {
return
}
if err := backend.kubernetesManager.RemovePersistentVolumeClaim(context.Background(), enclaveNamespaceName, enclaveDataDirVolumeName); err != nil {
logrus.Warnf(
"Creating pod didn't finish successfully - we tried removing the PVC %v but failed with error %v",
enclaveDataDirVolumeName,
err,
)
logrus.Warnf("You'll need to clean up volume claim '%v' manually!", enclaveDataDirVolumeName)
}
}()

apiContainerContainers, apiContainerVolumes, err := getApiContainerContainersAndVolumes(image, containerPorts, envVarsWithOwnIp, enclaveDataVolumeDirpath)
if err != nil {
return nil, stacktrace.Propagate(err, "An error occurred getting API containers and volumes")
}

apiContainerInitContainers := []apiv1.Container{}

// Data is always persistent we can always restart like Docker
apiContainerRestartPolicy := apiv1.RestartPolicyOnFailure

// Create pods with api container containers and volumes in Kubernetes
apiContainerPod, err := backend.kubernetesManager.CreatePod(
ctx,
Expand All @@ -430,6 +463,7 @@ func (backend *KubernetesKurtosisBackend) CreateAPIContainer(
apiContainerContainers,
apiContainerVolumes,
apiContainerServiceAccountName,
apiContainerRestartPolicy,
)
if err != nil {
return nil, stacktrace.Propagate(err, "An error occurred while creating the pod with name '%s' in namespace '%s' with image '%s'", apiContainerPodName, enclaveNamespaceName, image)
Expand Down Expand Up @@ -481,6 +515,7 @@ func (backend *KubernetesKurtosisBackend) CreateAPIContainer(
shouldRemoveServiceAccount = false
shouldRemovePod = false
shouldRemoveService = false
shouldDeleteVolumeClaim = false
return resultApiContainer, nil
}

Expand Down Expand Up @@ -995,7 +1030,7 @@ func getApiContainerContainersAndVolumes(
enclaveDataVolumeDirpath string,
) (
resultContainers []apiv1.Container,
resultPodVolumes []apiv1.Volume,
resultVolumes []apiv1.Volume,
resultErr error,
) {
if _, found := envVars[ApiContainerOwnNamespaceNameEnvVar]; found {
Expand Down Expand Up @@ -1047,38 +1082,38 @@ func getApiContainerContainersAndVolumes(
{
Name: enclaveDataDirVolumeName,
VolumeSource: apiv1.VolumeSource{
HostPath: nil,
EmptyDir: &apiv1.EmptyDirVolumeSource{
Medium: "",
SizeLimit: nil,
HostPath: nil,
EmptyDir: nil,
GCEPersistentDisk: nil,
AWSElasticBlockStore: nil,
GitRepo: nil,
Secret: nil,
NFS: nil,
ISCSI: nil,
Glusterfs: nil,
PersistentVolumeClaim: &apiv1.PersistentVolumeClaimVolumeSource{
ClaimName: enclaveDataDirVolumeName,
ReadOnly: false,
},
GCEPersistentDisk: nil,
AWSElasticBlockStore: nil,
GitRepo: nil,
Secret: nil,
NFS: nil,
ISCSI: nil,
Glusterfs: nil,
PersistentVolumeClaim: nil,
RBD: nil,
FlexVolume: nil,
Cinder: nil,
CephFS: nil,
Flocker: nil,
DownwardAPI: nil,
FC: nil,
AzureFile: nil,
ConfigMap: nil,
VsphereVolume: nil,
Quobyte: nil,
AzureDisk: nil,
PhotonPersistentDisk: nil,
Projected: nil,
PortworxVolume: nil,
ScaleIO: nil,
StorageOS: nil,
CSI: nil,
Ephemeral: nil,
RBD: nil,
FlexVolume: nil,
Cinder: nil,
CephFS: nil,
Flocker: nil,
DownwardAPI: nil,
FC: nil,
AzureFile: nil,
ConfigMap: nil,
VsphereVolume: nil,
Quobyte: nil,
AzureDisk: nil,
PhotonPersistentDisk: nil,
Projected: nil,
PortworxVolume: nil,
ScaleIO: nil,
StorageOS: nil,
CSI: nil,
Ephemeral: nil,
},
},
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ func GetEngineServerBackend(
func GetApiContainerBackend(
ctx context.Context,
storageClass string,
productionMode bool,
) (backend_interface.KurtosisBackend, error) {
kubernetesConfig, err := rest.InClusterConfig()
if err != nil {
Expand Down Expand Up @@ -107,6 +108,7 @@ func GetApiContainerBackend(
enclaveId,
namespaceName,
storageClass,
productionMode,
), nil
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ func preparePersistentDirectoriesResources(
serviceMountpointsToPersistentKey map[string]service_directory.PersistentDirectory,
kubernetesManager *kubernetes_manager.KubernetesManager,
) (map[string]*kubernetesVolumeWithClaim, error) {
shouldDeleteVolumesAndClaimsCreated := true
shouldDeleteVolumeClaims := true
volumeClaimsCreated := map[string]*apiv1.PersistentVolumeClaim{}

persistentVolumesAndClaims := map[string]*kubernetesVolumeWithClaim{}
Expand Down Expand Up @@ -109,7 +109,7 @@ func preparePersistentDirectoriesResources(
}

defer func() {
if !shouldDeleteVolumesAndClaimsCreated {
if !shouldDeleteVolumeClaims {
return
}
for volumeClaimNameStr := range volumeClaimsCreated {
Expand All @@ -125,6 +125,6 @@ func preparePersistentDirectoriesResources(
}
}()

shouldDeleteVolumesAndClaimsCreated = false
shouldDeleteVolumeClaims = false
return persistentVolumesAndClaims, nil
}
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ func StartRegisteredUserServices(
apiContainerModeArgs *shared_helpers.ApiContainerModeArgs,
engineServerModeArgs *shared_helpers.EngineServerModeArgs,
kubernetesManager *kubernetes_manager.KubernetesManager,
restartPolicy apiv1.RestartPolicy,
) (
map[service.ServiceUUID]*service.Service,
map[service.ServiceUUID]error,
Expand Down Expand Up @@ -184,7 +185,8 @@ func StartRegisteredUserServices(
enclaveUuid,
serviceRegisteredThatCanBeStarted,
existingObjectsAndResources,
kubernetesManager)
kubernetesManager,
restartPolicy)
if err != nil {
return nil, nil, stacktrace.Propagate(err, "An error occurred while trying to start services in parallel.")
}
Expand Down Expand Up @@ -246,6 +248,7 @@ func runStartServiceOperationsInParallel(
services map[service.ServiceUUID]*service.ServiceConfig,
servicesObjectsAndResources map[service.ServiceUUID]*shared_helpers.UserServiceObjectsAndKubernetesResources,
kubernetesManager *kubernetes_manager.KubernetesManager,
restartPolicy apiv1.RestartPolicy,
) (
map[service.ServiceUUID]*service.Service,
map[service.ServiceUUID]error,
Expand All @@ -259,7 +262,8 @@ func runStartServiceOperationsInParallel(
config,
servicesObjectsAndResources,
enclaveUUID,
kubernetesManager)
kubernetesManager,
restartPolicy)
}

successfulServiceObjs, failedOperations := operation_parallelizer.RunOperationsInParallel(startServiceOperations)
Expand Down Expand Up @@ -292,7 +296,8 @@ func createStartServiceOperation(
serviceConfig *service.ServiceConfig,
servicesObjectsAndResources map[service.ServiceUUID]*shared_helpers.UserServiceObjectsAndKubernetesResources,
enclaveUuid enclave.EnclaveUUID,
kubernetesManager *kubernetes_manager.KubernetesManager) operation_parallelizer.Operation {
kubernetesManager *kubernetes_manager.KubernetesManager,
restartPolicy apiv1.RestartPolicy) operation_parallelizer.Operation {

return func() (interface{}, error) {
filesArtifactsExpansion := serviceConfig.GetFilesArtifactsExpansion()
Expand Down Expand Up @@ -413,6 +418,7 @@ func createStartServiceOperation(
podContainers,
podVolumes,
userServiceServiceAccountName,
restartPolicy,
)
if err != nil {
return nil, stacktrace.Propagate(err, "An error occurred creating pod '%v' using image '%v'", podName, containerImageName)
Expand Down
Loading

0 comments on commit b0e27e6

Please sign in to comment.