diff --git a/swan-cern-system/.helmignore b/swan-cern-system/.helmignore new file mode 100644 index 00000000..0e8a0eb3 --- /dev/null +++ b/swan-cern-system/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/swan-cern-system/Chart.lock b/swan-cern-system/Chart.lock new file mode 100644 index 00000000..ddab5415 --- /dev/null +++ b/swan-cern-system/Chart.lock @@ -0,0 +1,9 @@ +dependencies: +- name: fluentd + repository: https://fluent.github.io/helm-charts + version: 0.4.3 +- name: gpu-operator + repository: oci://registry.cern.ch/kubernetes/charts + version: v24.6.2 +digest: sha256:c705dbb9ae12eb872da50373da7f29359efbb2da90b230963a66caf144646213 +generated: "2024-10-24T10:49:49.377010232+02:00" diff --git a/swan-cern-system/Chart.yaml b/swan-cern-system/Chart.yaml new file mode 100644 index 00000000..537279dc --- /dev/null +++ b/swan-cern-system/Chart.yaml @@ -0,0 +1,18 @@ +apiVersion: v2 +# +name: swan-cern-system +type: application +# version set by bump2version search/replace +version: 0.0.1 # __swan-cern-system_version__ +appVersion: 0.0.1 +# +description: The chart to deploy SWAN system components at CERN +# +dependencies: + - name: fluentd + version: 0.4.3 + repository: https://fluent.github.io/helm-charts + - name: gpu-operator + version: v24.6.2 + repository: oci://registry.cern.ch/kubernetes/charts + condition: gpu-operator.enabled diff --git a/swan-cern/files/ca-certs/ca-bundle.crt b/swan-cern-system/file/ca-certs/ca-bundle.crt similarity index 100% rename from swan-cern/files/ca-certs/ca-bundle.crt rename to swan-cern-system/file/ca-certs/ca-bundle.crt diff --git a/swan-cern/templates/fluentd/fluentd_ca.yaml b/swan-cern-system/templates/fluentd/fluentd_ca.yaml similarity index 100% rename from swan-cern/templates/fluentd/fluentd_ca.yaml rename to swan-cern-system/templates/fluentd/fluentd_ca.yaml diff --git a/swan-cern/templates/fluentd/fluentd_filters.conf.yaml b/swan-cern-system/templates/fluentd/fluentd_filters.conf.yaml similarity index 100% rename from swan-cern/templates/fluentd/fluentd_filters.conf.yaml rename to swan-cern-system/templates/fluentd/fluentd_filters.conf.yaml diff --git a/swan-cern/templates/fluentd/fluentd_outputs.conf.yaml b/swan-cern-system/templates/fluentd/fluentd_outputs.conf.yaml similarity index 100% rename from swan-cern/templates/fluentd/fluentd_outputs.conf.yaml rename to swan-cern-system/templates/fluentd/fluentd_outputs.conf.yaml diff --git a/swan-cern/templates/fluentd/fluentd_sources.conf.yaml b/swan-cern-system/templates/fluentd/fluentd_sources.conf.yaml similarity index 100% rename from swan-cern/templates/fluentd/fluentd_sources.conf.yaml rename to swan-cern-system/templates/fluentd/fluentd_sources.conf.yaml diff --git a/swan-cern/templates/gpu/kernel-module-params.yaml b/swan-cern-system/templates/gpu/kernel-module-params.yaml similarity index 100% rename from swan-cern/templates/gpu/kernel-module-params.yaml rename to swan-cern-system/templates/gpu/kernel-module-params.yaml diff --git a/swan-cern/templates/gpu/nvidia-dashboard-cm.yaml b/swan-cern-system/templates/gpu/nvidia-dashboard-cm.yaml similarity index 100% rename from swan-cern/templates/gpu/nvidia-dashboard-cm.yaml rename to swan-cern-system/templates/gpu/nvidia-dashboard-cm.yaml diff --git a/swan-cern/templates/gpu/nvidia-dcgm-exporter-config.yaml b/swan-cern-system/templates/gpu/nvidia-dcgm-exporter-config.yaml similarity index 100% rename from swan-cern/templates/gpu/nvidia-dcgm-exporter-config.yaml rename to swan-cern-system/templates/gpu/nvidia-dcgm-exporter-config.yaml diff --git a/swan-cern/templates/gpu/nvidia-dcgm-srvmon.yaml b/swan-cern-system/templates/gpu/nvidia-dcgm-srvmon.yaml similarity index 100% rename from swan-cern/templates/gpu/nvidia-dcgm-srvmon.yaml rename to swan-cern-system/templates/gpu/nvidia-dcgm-srvmon.yaml diff --git a/swan-cern/templates/gpu/nvidia-device-plugin-config.yaml b/swan-cern-system/templates/gpu/nvidia-device-plugin-config.yaml similarity index 100% rename from swan-cern/templates/gpu/nvidia-device-plugin-config.yaml rename to swan-cern-system/templates/gpu/nvidia-device-plugin-config.yaml diff --git a/swan-cern/templates/gpu/nvidia-grid-license.yaml b/swan-cern-system/templates/gpu/nvidia-grid-license.yaml similarity index 100% rename from swan-cern/templates/gpu/nvidia-grid-license.yaml rename to swan-cern-system/templates/gpu/nvidia-grid-license.yaml diff --git a/swan-cern/templates/gpu/nvidia-hack-force-mig-config.yaml b/swan-cern-system/templates/gpu/nvidia-hack-force-mig-config.yaml similarity index 100% rename from swan-cern/templates/gpu/nvidia-hack-force-mig-config.yaml rename to swan-cern-system/templates/gpu/nvidia-hack-force-mig-config.yaml diff --git a/swan-cern/templates/gpu/nvidia-mig-config.yaml b/swan-cern-system/templates/gpu/nvidia-mig-config.yaml similarity index 100% rename from swan-cern/templates/gpu/nvidia-mig-config.yaml rename to swan-cern-system/templates/gpu/nvidia-mig-config.yaml diff --git a/swan-cern/templates/prometheus/rules.yaml b/swan-cern-system/templates/prometheus/rules.yaml similarity index 100% rename from swan-cern/templates/prometheus/rules.yaml rename to swan-cern-system/templates/prometheus/rules.yaml diff --git a/swan-cern-system/values.yaml b/swan-cern-system/values.yaml new file mode 100644 index 00000000..8c969861 --- /dev/null +++ b/swan-cern-system/values.yaml @@ -0,0 +1,119 @@ +fluentd: + caCertPath: &fluentdCaCertPath /etc/ssl/ca-bundle.crt + plugins: + - fluent-plugin-rewrite-tag-filter + - fluent-plugin-out-http + - fluent-plugin-grok-parser + - fluent-plugin-route + containerRuntime: containerd + output: + includeInternal: false + cacert: *fluentdCaCertPath + configMapConfigs: + - fluentd-prometheus-conf # Preserve prometheus config for probes to work + - fluentd-sources-conf + - fluentd-filters-conf + - fluentd-outputs-conf + fileConfigs: + # This is to disable the configuration that comes from upstream + 01_sources.conf: "" + 02_filters.conf: "" + 03_dispatch.conf: "" + 04_outputs.conf: "" + volumeMounts: + - name: ca-certificate + mountPath: *fluentdCaCertPath + subPath: ca-bundle.crt + readOnly: true + volumes: + - name: ca-certificate + configMap: + name: fluentd-ca + items: + - key: ca-bundle.crt + path: ca-bundle.crt + +gpu-operator: + enabled: true + validator: + repository: registry.cern.ch/kubernetes + version: v24.6.2 + operator: + repository: registry.cern.ch/kubernetes + version: v24.6.2 + defaultRuntime: containerd + initContainer: + repository: registry.cern.ch/kubernetes + version: 11.4.2-base-ubi8 + driver: + repository: registry.cern.ch/kubernetes + image: nvidia-gpu-driver + version: "v550.54.15-6.8.4-200.fc39.x86_64" + imagePullPolicy: Always + manager: + repository: registry.cern.ch/kubernetes + version: v0.6.0 + licensingConfig: + configMapName: "nvidia-grid-license" + nlsEnabled: false + nfd: + enabled: false + vgpuManager: + repository: registry.cern.ch + driverManager: + repository: registry.cern.ch + version: v0.5.1 + vfioManager: + repository: registry.cern.ch + version: 11.7.1-base-ubi8 + driverManager: + repository: registry.cern.ch + version: v0.5.1 + gdrcopy: + repository: registry.cern.ch/kubernetes + version: v2.4.1-1 + vgpuDeviceManager: + repository: registry.cern.ch + version: v0.2.0 + toolkit: + repository: registry.cern.ch/kubernetes + version: v1.16.2-ubuntu20.04 + devicePlugin: + repository: registry.cern.ch/kubernetes + version: v0.13.0 + dcgm: + repository: registry.cern.ch + version: 3.1.3-1-ubuntu20.04 + dcgmExporter: + repository: registry.cern.ch/kubernetes + version: 3.1.3-3.1.2-ubuntu20.04 + config: + name: nvidia-dcgm-exporter-metrics + gfd: + repository: registry.cern.ch/kubernetes + version: v0.16.2-ubi8 + mig: + strategy: mixed + migManager: + repository: registry.cern.ch/kubernetes + version: v0.5.0 + config: + name: nvidia-mig-config + nodeStatusExporter: + repository: registry.cern.ch + version: v24.6.2 + kataManager: + repository: registry.cern.ch/kubernetes + version: v0.2.1 + sandboxDevicePlugin: + repository: registry.cern.ch/kubernetes + version: v1.2.1 + ccManager: + repository: registry.cern.ch/kubernetes + version: v0.1.1 + devicePlugin: + repository: registry.cern.ch/kubernetes + version: v0.12.2-ubi8 + config: + name: nvidia-device-plugin-config + default: "default" diff --git a/swan-cern/Chart.lock b/swan-cern/Chart.lock index 10ca103a..ab735e45 100644 --- a/swan-cern/Chart.lock +++ b/swan-cern/Chart.lock @@ -1,12 +1,6 @@ dependencies: - name: swan repository: oci://registry.cern.ch/swan/charts - version: 2.3.8 -- name: fluentd - repository: https://fluent.github.io/helm-charts - version: 0.4.3 -- name: gpu-operator - repository: oci://registry.cern.ch/kubernetes/charts - version: v24.6.2 -digest: sha256:02b128aa07fd796252255ac4f09708fef9b99aabd0d01d4f2c29d28a2e1aca55 -generated: "2024-10-16T10:39:56.250436142+02:00" + version: 2.3.1 +digest: sha256:29975b17c609f14239268ceb400c12d076fa4700acff018a8f56abc0b4314da7 +generated: "2024-09-16T14:32:44.292303568+02:00" diff --git a/swan-cern/Chart.yaml b/swan-cern/Chart.yaml index 6b369467..894a1c1f 100644 --- a/swan-cern/Chart.yaml +++ b/swan-cern/Chart.yaml @@ -13,10 +13,3 @@ dependencies: version: 2.3.8 repository: oci://registry.cern.ch/swan/charts condition: swan.enabled - - name: fluentd - version: 0.4.3 - repository: https://fluent.github.io/helm-charts - - name: gpu-operator - version: v24.6.2 - repository: oci://registry.cern.ch/kubernetes/charts - condition: gpu-operator.enabled diff --git a/swan-cern/values.yaml b/swan-cern/values.yaml index 22b30e13..c5e9cc35 100644 --- a/swan-cern/values.yaml +++ b/swan-cern/values.yaml @@ -237,123 +237,3 @@ swanCern: hadoopTokenGenerator: image: gitlab-registry.cern.ch/swan/docker-images/hadoop-token-generator:v3.0.4 - -fluentd: - caCertPath: &fluentdCaCertPath /etc/ssl/ca-bundle.crt - plugins: - - fluent-plugin-rewrite-tag-filter - - fluent-plugin-out-http - - fluent-plugin-grok-parser - - fluent-plugin-route - containerRuntime: containerd - output: - includeInternal: false - cacert: *fluentdCaCertPath - configMapConfigs: - - fluentd-prometheus-conf # Preserve prometheus config for probes to work - - fluentd-sources-conf - - fluentd-filters-conf - - fluentd-outputs-conf - fileConfigs: - # This is to disable the configuration that comes from upstream - 01_sources.conf: "" - 02_filters.conf: "" - 03_dispatch.conf: "" - 04_outputs.conf: "" - volumeMounts: - - name: ca-certificate - mountPath: *fluentdCaCertPath - subPath: ca-bundle.crt - readOnly: true - volumes: - - name: ca-certificate - configMap: - name: fluentd-ca - items: - - key: ca-bundle.crt - path: ca-bundle.crt - -gpu-operator: - enabled: true - validator: - repository: registry.cern.ch/kubernetes - version: v24.6.2 - operator: - repository: registry.cern.ch/kubernetes - version: v24.6.2 - defaultRuntime: containerd - initContainer: - repository: registry.cern.ch/kubernetes - version: 11.4.2-base-ubi8 - driver: - repository: registry.cern.ch/kubernetes - image: nvidia-gpu-driver - version: "v550.54.15-6.8.4-200.fc39.x86_64" - imagePullPolicy: Always - manager: - repository: registry.cern.ch/kubernetes - version: v0.6.0 - licensingConfig: - configMapName: "nvidia-grid-license" - nlsEnabled: false - nfd: - enabled: false - vgpuManager: - repository: registry.cern.ch - driverManager: - repository: registry.cern.ch - version: v0.5.1 - vfioManager: - repository: registry.cern.ch - version: 11.7.1-base-ubi8 - driverManager: - repository: registry.cern.ch - version: v0.5.1 - gdrcopy: - repository: registry.cern.ch/kubernetes - version: v2.4.1-1 - vgpuDeviceManager: - repository: registry.cern.ch - version: v0.2.0 - toolkit: - repository: registry.cern.ch/kubernetes - version: v1.16.2-ubuntu20.04 - devicePlugin: - repository: registry.cern.ch/kubernetes - version: v0.13.0 - dcgm: - repository: registry.cern.ch - version: 3.1.3-1-ubuntu20.04 - dcgmExporter: - repository: registry.cern.ch/kubernetes - version: 3.1.3-3.1.2-ubuntu20.04 - config: - name: nvidia-dcgm-exporter-metrics - gfd: - repository: registry.cern.ch/kubernetes - version: v0.16.2-ubi8 - mig: - strategy: mixed - migManager: - repository: registry.cern.ch/kubernetes - version: v0.5.0 - config: - name: nvidia-mig-config - nodeStatusExporter: - repository: registry.cern.ch - version: v24.6.2 - kataManager: - repository: registry.cern.ch/kubernetes - version: v0.2.1 - sandboxDevicePlugin: - repository: registry.cern.ch/kubernetes - version: v1.2.1 - ccManager: - repository: registry.cern.ch/kubernetes - version: v0.1.1 - devicePlugin: - repository: registry.cern.ch/kubernetes - version: v0.12.2-ubi8 - config: - name: nvidia-device-plugin-config - default: "default"