Skip to content

Commit

Permalink
Merge branch 'master' into andyzhangx/upgrade-file-driver-v1.31.3
Browse files Browse the repository at this point in the history
  • Loading branch information
andyzhangx authored Jan 29, 2025
2 parents 14f8d08 + b089651 commit 629a9a1
Show file tree
Hide file tree
Showing 303 changed files with 74,138 additions and 15,202 deletions.
2 changes: 1 addition & 1 deletion aks-node-controller/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@ proto-generate:
.PHONY: proto-lint
proto-lint:
@($(BUF) lint)
@($(BUF) breaking --against '../.git#branch=dev,subdir=aks-node-controller') # TODO: change to master
@($(BUF) breaking --against '../.git#branch=master,subdir=aks-node-controller')
37 changes: 32 additions & 5 deletions aks-node-controller/parser/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,13 @@ var (
containerdConfigTemplateText string
//nolint:gochecknoglobals
containerdConfigTemplate = template.Must(
template.New("containerdconfigforaksnodeconfig").Funcs(getFuncMapForContainerdConfigTemplate()).Parse(containerdConfigTemplateText),
template.New("containerdconfig").Funcs(getFuncMapForContainerdConfigTemplate()).Parse(containerdConfigTemplateText),
)
//go:embed templates/containerd_no_GPU.toml.gtpl
containerdConfigNoGPUTemplateText string
//nolint:gochecknoglobals
containerdConfigNoGPUTemplate = template.Must(
template.New("nogpucontainerdconfig").Funcs(getFuncMapForContainerdConfigTemplate()).Parse(containerdConfigNoGPUTemplateText),
)
)

Expand Down Expand Up @@ -139,26 +145,47 @@ func getKubenetTemplate() string {
return base64.StdEncoding.EncodeToString(kubenetTemplateContent)
}

func getContainerdConfig(aksnodeconfig *aksnodeconfigv1.Configuration) string {
// getContainerdConfigBase64 returns the base64 encoded containerd config depending on whether the node is with GPU or not.
func getContainerdConfigBase64(aksnodeconfig *aksnodeconfigv1.Configuration) string {
if aksnodeconfig == nil {
return ""
}

containerdConfig, err := containerdConfigFromAKSNodeConfig(aksnodeconfig)
containerdConfig, err := containerdConfigFromAKSNodeConfig(aksnodeconfig, false)
if err != nil {
return fmt.Sprintf("error getting containerd config from node bootstrap variables: %v", err)
}

return base64.StdEncoding.EncodeToString([]byte(containerdConfig))
}

func containerdConfigFromAKSNodeConfig(aksnodeconfig *aksnodeconfigv1.Configuration) (string, error) {
// getNoGPUContainerdConfigBase64 returns the base64 encoded containerd config depending on whether the node is with GPU or not.
func getNoGPUContainerdConfigBase64(aksnodeconfig *aksnodeconfigv1.Configuration) string {
if aksnodeconfig == nil {
return ""
}

containerdConfig, err := containerdConfigFromAKSNodeConfig(aksnodeconfig, true)
if err != nil {
return fmt.Sprintf("error getting No GPU containerd config from node bootstrap variables: %v", err)
}

return base64.StdEncoding.EncodeToString([]byte(containerdConfig))
}

func containerdConfigFromAKSNodeConfig(aksnodeconfig *aksnodeconfigv1.Configuration, noGPU bool) (string, error) {
if aksnodeconfig == nil {
return "", fmt.Errorf("AKSNodeConfig is nil")
}

// the containerd config template is different based on whether the node is with GPU or not.
_template := containerdConfigTemplate
if noGPU {
_template = containerdConfigNoGPUTemplate
}

var buffer bytes.Buffer
if err := containerdConfigTemplate.Execute(&buffer, aksnodeconfig); err != nil {
if err := _template.Execute(&buffer, aksnodeconfig); err != nil {
return "", fmt.Errorf("error executing containerd config template for AKSNodeConfig: %w", err)
}

Expand Down
66 changes: 64 additions & 2 deletions aks-node-controller/parser/helper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,21 +249,83 @@ func Test_createSortedKeyValueInt32Pairs(t *testing.T) {
func Test_getContainerdConfig(t *testing.T) {
type args struct {
aksnodeconfig *aksnodeconfigv1.Configuration
noGpu bool
}
tests := []struct {
name string
args args
want string
}{
{
name: "Default Configuration",
name: "Default Containerd Configurations",
args: args{
aksnodeconfig: &aksnodeconfigv1.Configuration{
NeedsCgroupv2: ToPtr(true),
},
},
want: base64.StdEncoding.EncodeToString([]byte(`version = 2
oom_score = 0
[plugins."io.containerd.grpc.v1.cri"]
sandbox_image = ""
[plugins."io.containerd.grpc.v1.cri".containerd]
default_runtime_name = "runc"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
BinaryName = "/usr/bin/runc"
SystemdCgroup = true
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.untrusted]
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.untrusted.options]
BinaryName = "/usr/bin/runc"
[plugins."io.containerd.grpc.v1.cri".registry.headers]
X-Meta-Source-Client = ["azure/aks"]
[metrics]
address = "0.0.0.0:10257"
`)),
},
{
name: "Containerd Configurations with bool noGpu set to false",
args: args{
aksnodeconfig: &aksnodeconfigv1.Configuration{
NeedsCgroupv2: ToPtr(true),
GpuConfig: &aksnodeconfigv1.GpuConfig{
EnableNvidia: ToPtr(true),
},
},
noGpu: false,
},
want: base64.StdEncoding.EncodeToString([]byte(`version = 2
oom_score = 0
[plugins."io.containerd.grpc.v1.cri"]
sandbox_image = ""
[plugins."io.containerd.grpc.v1.cri".containerd]
default_runtime_name = "nvidia-container-runtime"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-container-runtime]
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-container-runtime.options]
BinaryName = "/usr/bin/nvidia-container-runtime"
SystemdCgroup = true
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.untrusted]
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.untrusted.options]
BinaryName = "/usr/bin/nvidia-container-runtime"
[plugins."io.containerd.grpc.v1.cri".registry.headers]
X-Meta-Source-Client = ["azure/aks"]
[metrics]
address = "0.0.0.0:10257"
`)),
},
{
name: "Containerd Configurations with bool noGpu set to true",
args: args{
aksnodeconfig: &aksnodeconfigv1.Configuration{
NeedsCgroupv2: ToPtr(true),
},
noGpu: true,
},
want: base64.StdEncoding.EncodeToString([]byte(`version = 2
oom_score = 0
[plugins."io.containerd.grpc.v1.cri"]
sandbox_image = ""
[plugins."io.containerd.grpc.v1.cri".containerd]
Expand All @@ -286,7 +348,7 @@ oom_score = 0
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := getContainerdConfig(tt.args.aksnodeconfig); got != tt.want {
if got := getContainerdConfigBase64(tt.args.aksnodeconfig); got != tt.want {
t.Errorf("getContainerdConfig() = %v, want %v", got, tt.want)
}
})
Expand Down
3 changes: 2 additions & 1 deletion aks-node-controller/parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,8 @@ func getCSEEnv(config *aksnodeconfigv1.Configuration) map[string]string {
"AZURE_ENVIRONMENT_FILEPATH": getAzureEnvironmentFilepath(config),
"KUBE_CA_CRT": config.GetKubernetesCaCert(),
"KUBENET_TEMPLATE": getKubenetTemplate(),
"CONTAINERD_CONFIG_CONTENT": getContainerdConfig(config),
"CONTAINERD_CONFIG_CONTENT": getContainerdConfigBase64(config),
"CONTAINERD_CONFIG_NO_GPU_CONTENT": getNoGPUContainerdConfigBase64(config),
"IS_KATA": fmt.Sprintf("%v", config.GetIsKata()),
"ARTIFACT_STREAMING_ENABLED": fmt.Sprintf("%v", config.GetEnableArtifactStreaming()),
"SYSCTL_CONTENT": getSysctlContent(config.GetCustomLinuxOsConfig().GetSysctlConfig()),
Expand Down
4 changes: 2 additions & 2 deletions aks-node-controller/parser/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,12 @@ func TestBuildCSECmd(t *testing.T) {
validator: func(cmd *exec.Cmd) {
vars := environToMap(cmd.Env)
assert.Equal(t, "false", vars["GPU_NODE"])
assert.NotEmpty(t, vars["CONTAINERD_CONFIG_CONTENT"])
assert.NotEmpty(t, vars["CONTAINERD_CONFIG_NO_GPU_CONTENT"])
// Ensure the containerd config does not use the
// nvidia container runtime when skipping the
// GPU driver install, since it will fail to run even non-GPU
// pods, as it will not be installed.
containerdConfigFileContent, err := getBase64DecodedValue([]byte(vars["CONTAINERD_CONFIG_CONTENT"]))
containerdConfigFileContent, err := getBase64DecodedValue([]byte(vars["CONTAINERD_CONFIG_NO_GPU_CONTENT"]))
require.NoError(t, err)
expectedShimConfig := `version = 2
oom_score = 0
Expand Down
105 changes: 105 additions & 0 deletions aks-node-controller/parser/templates/containerd_no_GPU.toml.gtpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
version = 2
oom_score = 0{{if getHasDataDir .KubeletConfig}}
root = "{{.KubeletConfig.GetContainerDataDir}}"{{- end}}
[plugins."io.containerd.grpc.v1.cri"]
sandbox_image = "{{ .KubeBinaryConfig.GetPodInfraContainerImageUrl }}"
[plugins."io.containerd.grpc.v1.cri".containerd]
{{- if .TeleportConfig.GetStatus }}
snapshotter = "teleportd"
disable_snapshot_annotations = false
{{- else}}
{{- if .GetIsKata }}
disable_snapshot_annotations = false
{{- end}}
{{- end}}
{{- if .GetEnableArtifactStreaming }}
snapshotter = "overlaybd"
disable_snapshot_annotations = false
{{- end}}
default_runtime_name = "runc"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
BinaryName = "/usr/bin/runc"
{{- if .NeedsCgroupv2 }}
SystemdCgroup = true
{{- end}}
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.untrusted]
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.untrusted.options]
BinaryName = "/usr/bin/runc"
{{- if getIsKrustlet .GetWorkloadRuntime }}
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.spin]
runtime_type = "io.containerd.spin.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.slight]
runtime_type = "io.containerd.slight-v0-3-0.v1"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.spin-v0-3-0]
runtime_type = "io.containerd.spin-v0-3-0.v1"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.slight-v0-3-0]
runtime_type = "io.containerd.slight-v0-3-0.v1"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.spin-v0-5-1]
runtime_type = "io.containerd.spin-v0-5-1.v1"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.slight-v0-5-1]
runtime_type = "io.containerd.slight-v0-5-1.v1"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.spin-v0-8-0]
runtime_type = "io.containerd.spin-v0-8-0.v1"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.slight-v0-8-0]
runtime_type = "io.containerd.slight-v0-8-0.v1"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.wws-v0-8-0]
runtime_type = "io.containerd.wws-v0-8-0.v1"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.spin-v0-15-1]
runtime_type = "io.containerd.spin.v2"
{{- end}}
{{- if getEnsureNoDupePromiscuousBridge .GetNetworkConfig }}
[plugins."io.containerd.grpc.v1.cri".cni]
bin_dir = "/opt/cni/bin"
conf_dir = "/etc/cni/net.d"
conf_template = "/etc/containerd/kubenet_template.conf"
{{- end}}
{{- if isKubernetesVersionGe .GetKubernetesVersion "1.22.0"}}
[plugins."io.containerd.grpc.v1.cri".registry]
config_path = "/etc/containerd/certs.d"
{{- end}}
[plugins."io.containerd.grpc.v1.cri".registry.headers]
X-Meta-Source-Client = ["azure/aks"]
[metrics]
address = "0.0.0.0:10257"
{{- if .TeleportConfig.GetStatus }}
[proxy_plugins]
[proxy_plugins.teleportd]
type = "snapshot"
address = "/run/teleportd/snapshotter.sock"
{{- end}}
{{- if .GetEnableArtifactStreaming }}
[proxy_plugins]
[proxy_plugins.overlaybd]
type = "snapshot"
address = "/run/overlaybd-snapshotter/overlaybd.sock"
{{- end}}
{{- if .GetIsKata }}
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.kata]
runtime_type = "io.containerd.kata.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.katacli]
runtime_type = "io.containerd.runc.v1"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.katacli.options]
NoPivotRoot = false
NoNewKeyring = false
ShimCgroup = ""
IoUid = 0
IoGid = 0
BinaryName = "/usr/bin/kata-runtime"
Root = ""
CriuPath = ""
SystemdCgroup = false
[proxy_plugins]
[proxy_plugins.tardev]
type = "snapshot"
address = "/run/containerd/tardev-snapshotter.sock"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.kata-cc]
snapshotter = "tardev"
runtime_type = "io.containerd.kata-cc.v2"
privileged_without_host_devices = true
pod_annotations = ["io.katacontainers.*"]
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.kata-cc.options]
ConfigPath = "/opt/confidential-containers/share/defaults/kata-containers/configuration-clh-snp.toml"
{{- end}}
4 changes: 2 additions & 2 deletions aks-node-controller/proto/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ This table is describing the all the AKSNodeConfig Fields converted to .go files
| `BootstrappingConfig` | `BootstrappingConfig` | Bootstrap configuration | `ENABLE_TLS_BOOTSTRAPPING`, `ENABLE_SECURE_TLS_BOOTSTRAPPING`, `CUSTOM_SECURE_TLS_BOOTSTRAP_AAD_SERVER_APP_ID` |
| `AuthConfig` | `AuthConfig` | Authentication configuration | `TENANT_ID`, `SUBSCRIPTION_ID`, `SERVICE_PRINCIPAL_CLIENT_ID`, `SERVICE_PRINCIPAL_FILE_CONTENT`, `USER_ASSIGNED_IDENTITY_ID`, `USE_MANAGED_IDENTITY_EXTENSION` |
| `RuncConfig` | `RuncConfig` | The CLI tool runc configuration | `RUNC_VERSION`, `RUNC_PACKAGE_URL` |
| `ContainerdConfig` | `ContainerdConfig` | Containerd configuration | `CONTAINERD_DOWNLOAD_URL_BASE`, `CONTAINERD_VERSION`, `CONTAINERD_PACKAGE_URL` |
| `ContainerdConfig` | `ContainerdConfig` | Containerd configuration | `CONTAINERD_DOWNLOAD_URL_BASE`, `CONTAINERD_VERSION`, `CONTAINERD_PACKAGE_URL`, `CONTAINERD_CONFIG_CONTENT`, `CONTAINERD_CONFIG_NO_GPU_CONTENT` |
| `TeleportConfig` | `TeleportConfig` | Teleport configuration | `TELEPORT_ENABLED`, `TELEPORTD_PLUGIN_DOWNLOAD_URL` |
| `KubeletConfig` | `KubeletConfig` | Kubelet configuration | `KUBELET_FLAGS`, `KUBELET_NODE_LABELS`, `HAS_KUBELET_DISK_TYPE`, `KUBELET_CONFIG_FILE_ENABLED`, `KUBELET_CONFIG_FILE_CONTENT`, `KUBELET_CLIENT_CONTENT`, `KUBELET_CLIENT_CERT_CONTENT` |
| `CustomSearchDomainConfig` | `CustomSearchDomainConfig` | Custom search domain configuration | `CUSTOM_SEARCH_DOMAIN_NAME`, `CUSTOM_SEARCH_REALM_USER`, `CUSTOM_SEARCH_REALM_PASSWORD` |
Expand Down Expand Up @@ -52,7 +52,7 @@ This table is describing the all the AKSNodeConfig Fields converted to .go files


Removed old environment variables from cse_cmd.sh:
`CSE_HELPERS_FILEPATH`, `CSE_DISTRO_HELPERS_FILEPATH`, `CSE_INSTALL_FILEPATH`, `CSE_DISTRO_INSTALL_FILEPATH`, `CSE_CONFIG_FILEPATH`, `DHCPV6_SERVICE_FILEPATH`, `DHCPV6_CONFIG_FILEPATH`, `NEEDS_DOCKER_LOGIN`, `NEEDS_CONTAINERD`, `CLI_TOOL`, `CONTAINER_RUNTIME`, `MOBY_VERSION`, `HYPERKUBE_URL`, `SGX_NODE` and more.
`CSE_HELPERS_FILEPATH`, `CSE_DISTRO_HELPERS_FILEPATH`, `CSE_INSTALL_FILEPATH`, `CSE_DISTRO_INSTALL_FILEPATH`, `CSE_CONFIG_FILEPATH`, `DHCPV6_SERVICE_FILEPATH`, `DHCPV6_CONFIG_FILEPATH`, `NEEDS_DOCKER_LOGIN`, `NEEDS_CONTAINERD`, `CLI_TOOL`, `CONTAINER_RUNTIME`, `MOBY_VERSION`, `HYPERKUBE_URL`, `SGX_NODE`, `GPU_DRIVER_TYPE` and more.

Many variables are changed to optional and we have a builder function as a helper to provide default values. For example, the builder function defaults `LinuxAdminUsername` to value `azureuser`, `OutboundCommand` to a default outbound command `curl -v --insecure --proxy-insecure https://mcr.microsoft.com/v2/`.

Expand Down
Loading

0 comments on commit 629a9a1

Please sign in to comment.