Skip to content

Commit

Permalink
swan-cern-system: Move system dependencies to new helm chart
Browse files Browse the repository at this point in the history
This way, gpu-operator and fluentd can belong to a new chart and
be deployed independently of the swan-cern chart.

The swan-cern chart will then be able to be deployed twice in the
same cluster, in two namespaces, since all the remaining components
can be duplicated and coexist in the same cluster (e.g. eos, cvmfs).
  • Loading branch information
PMax5 authored and etejedor committed Oct 29, 2024
1 parent d982c0c commit ee67250
Show file tree
Hide file tree
Showing 21 changed files with 172 additions and 136 deletions.
23 changes: 23 additions & 0 deletions swan-cern-system/.helmignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/
9 changes: 9 additions & 0 deletions swan-cern-system/Chart.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
dependencies:
- name: fluentd
repository: https://fluent.github.io/helm-charts
version: 0.4.3
- name: gpu-operator
repository: oci://registry.cern.ch/kubernetes/charts
version: v24.6.2
digest: sha256:c705dbb9ae12eb872da50373da7f29359efbb2da90b230963a66caf144646213
generated: "2024-10-24T10:49:49.377010232+02:00"
18 changes: 18 additions & 0 deletions swan-cern-system/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
apiVersion: v2
#
name: swan-cern-system
type: application
# version set by bump2version search/replace
version: 0.0.1 # __swan-cern-system_version__
appVersion: 0.0.1
#
description: The chart to deploy SWAN system components at CERN
#
dependencies:
- name: fluentd
version: 0.4.3
repository: https://fluent.github.io/helm-charts
- name: gpu-operator
version: v24.6.2
repository: oci://registry.cern.ch/kubernetes/charts
condition: gpu-operator.enabled
File renamed without changes.
File renamed without changes.
File renamed without changes.
119 changes: 119 additions & 0 deletions swan-cern-system/values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
fluentd:
caCertPath: &fluentdCaCertPath /etc/ssl/ca-bundle.crt
plugins:
- fluent-plugin-rewrite-tag-filter
- fluent-plugin-out-http
- fluent-plugin-grok-parser
- fluent-plugin-route
containerRuntime: containerd
output:
includeInternal: false
cacert: *fluentdCaCertPath
configMapConfigs:
- fluentd-prometheus-conf # Preserve prometheus config for probes to work
- fluentd-sources-conf
- fluentd-filters-conf
- fluentd-outputs-conf
fileConfigs:
# This is to disable the configuration that comes from upstream
01_sources.conf: ""
02_filters.conf: ""
03_dispatch.conf: ""
04_outputs.conf: ""
volumeMounts:
- name: ca-certificate
mountPath: *fluentdCaCertPath
subPath: ca-bundle.crt
readOnly: true
volumes:
- name: ca-certificate
configMap:
name: fluentd-ca
items:
- key: ca-bundle.crt
path: ca-bundle.crt

gpu-operator:
enabled: true
validator:
repository: registry.cern.ch/kubernetes
version: v24.6.2
operator:
repository: registry.cern.ch/kubernetes
version: v24.6.2
defaultRuntime: containerd
initContainer:
repository: registry.cern.ch/kubernetes
version: 11.4.2-base-ubi8
driver:
repository: registry.cern.ch/kubernetes
image: nvidia-gpu-driver
version: "v550.54.15-6.8.4-200.fc39.x86_64"
imagePullPolicy: Always
manager:
repository: registry.cern.ch/kubernetes
version: v0.6.0
licensingConfig:
configMapName: "nvidia-grid-license"
nlsEnabled: false
nfd:
enabled: false
vgpuManager:
repository: registry.cern.ch
driverManager:
repository: registry.cern.ch
version: v0.5.1
vfioManager:
repository: registry.cern.ch
version: 11.7.1-base-ubi8
driverManager:
repository: registry.cern.ch
version: v0.5.1
gdrcopy:
repository: registry.cern.ch/kubernetes
version: v2.4.1-1
vgpuDeviceManager:
repository: registry.cern.ch
version: v0.2.0
toolkit:
repository: registry.cern.ch/kubernetes
version: v1.16.2-ubuntu20.04
devicePlugin:
repository: registry.cern.ch/kubernetes
version: v0.13.0
dcgm:
repository: registry.cern.ch
version: 3.1.3-1-ubuntu20.04
dcgmExporter:
repository: registry.cern.ch/kubernetes
version: 3.1.3-3.1.2-ubuntu20.04
config:
name: nvidia-dcgm-exporter-metrics
gfd:
repository: registry.cern.ch/kubernetes
version: v0.16.2-ubi8
mig:
strategy: mixed
migManager:
repository: registry.cern.ch/kubernetes
version: v0.5.0
config:
name: nvidia-mig-config
nodeStatusExporter:
repository: registry.cern.ch
version: v24.6.2
kataManager:
repository: registry.cern.ch/kubernetes
version: v0.2.1
sandboxDevicePlugin:
repository: registry.cern.ch/kubernetes
version: v1.2.1
ccManager:
repository: registry.cern.ch/kubernetes
version: v0.1.1
devicePlugin:
repository: registry.cern.ch/kubernetes
version: v0.12.2-ubi8
config:
name: nvidia-device-plugin-config
default: "default"
12 changes: 3 additions & 9 deletions swan-cern/Chart.lock
Original file line number Diff line number Diff line change
@@ -1,12 +1,6 @@
dependencies:
- name: swan
repository: oci://registry.cern.ch/swan/charts
version: 2.3.8
- name: fluentd
repository: https://fluent.github.io/helm-charts
version: 0.4.3
- name: gpu-operator
repository: oci://registry.cern.ch/kubernetes/charts
version: v24.6.2
digest: sha256:02b128aa07fd796252255ac4f09708fef9b99aabd0d01d4f2c29d28a2e1aca55
generated: "2024-10-16T10:39:56.250436142+02:00"
version: 2.3.1
digest: sha256:29975b17c609f14239268ceb400c12d076fa4700acff018a8f56abc0b4314da7
generated: "2024-09-16T14:32:44.292303568+02:00"
7 changes: 0 additions & 7 deletions swan-cern/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,3 @@ dependencies:
version: 2.3.8
repository: oci://registry.cern.ch/swan/charts
condition: swan.enabled
- name: fluentd
version: 0.4.3
repository: https://fluent.github.io/helm-charts
- name: gpu-operator
version: v24.6.2
repository: oci://registry.cern.ch/kubernetes/charts
condition: gpu-operator.enabled
120 changes: 0 additions & 120 deletions swan-cern/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -237,123 +237,3 @@ swanCern:

hadoopTokenGenerator:
image: gitlab-registry.cern.ch/swan/docker-images/hadoop-token-generator:v3.0.4

fluentd:
caCertPath: &fluentdCaCertPath /etc/ssl/ca-bundle.crt
plugins:
- fluent-plugin-rewrite-tag-filter
- fluent-plugin-out-http
- fluent-plugin-grok-parser
- fluent-plugin-route
containerRuntime: containerd
output:
includeInternal: false
cacert: *fluentdCaCertPath
configMapConfigs:
- fluentd-prometheus-conf # Preserve prometheus config for probes to work
- fluentd-sources-conf
- fluentd-filters-conf
- fluentd-outputs-conf
fileConfigs:
# This is to disable the configuration that comes from upstream
01_sources.conf: ""
02_filters.conf: ""
03_dispatch.conf: ""
04_outputs.conf: ""
volumeMounts:
- name: ca-certificate
mountPath: *fluentdCaCertPath
subPath: ca-bundle.crt
readOnly: true
volumes:
- name: ca-certificate
configMap:
name: fluentd-ca
items:
- key: ca-bundle.crt
path: ca-bundle.crt

gpu-operator:
enabled: true
validator:
repository: registry.cern.ch/kubernetes
version: v24.6.2
operator:
repository: registry.cern.ch/kubernetes
version: v24.6.2
defaultRuntime: containerd
initContainer:
repository: registry.cern.ch/kubernetes
version: 11.4.2-base-ubi8
driver:
repository: registry.cern.ch/kubernetes
image: nvidia-gpu-driver
version: "v550.54.15-6.8.4-200.fc39.x86_64"
imagePullPolicy: Always
manager:
repository: registry.cern.ch/kubernetes
version: v0.6.0
licensingConfig:
configMapName: "nvidia-grid-license"
nlsEnabled: false
nfd:
enabled: false
vgpuManager:
repository: registry.cern.ch
driverManager:
repository: registry.cern.ch
version: v0.5.1
vfioManager:
repository: registry.cern.ch
version: 11.7.1-base-ubi8
driverManager:
repository: registry.cern.ch
version: v0.5.1
gdrcopy:
repository: registry.cern.ch/kubernetes
version: v2.4.1-1
vgpuDeviceManager:
repository: registry.cern.ch
version: v0.2.0
toolkit:
repository: registry.cern.ch/kubernetes
version: v1.16.2-ubuntu20.04
devicePlugin:
repository: registry.cern.ch/kubernetes
version: v0.13.0
dcgm:
repository: registry.cern.ch
version: 3.1.3-1-ubuntu20.04
dcgmExporter:
repository: registry.cern.ch/kubernetes
version: 3.1.3-3.1.2-ubuntu20.04
config:
name: nvidia-dcgm-exporter-metrics
gfd:
repository: registry.cern.ch/kubernetes
version: v0.16.2-ubi8
mig:
strategy: mixed
migManager:
repository: registry.cern.ch/kubernetes
version: v0.5.0
config:
name: nvidia-mig-config
nodeStatusExporter:
repository: registry.cern.ch
version: v24.6.2
kataManager:
repository: registry.cern.ch/kubernetes
version: v0.2.1
sandboxDevicePlugin:
repository: registry.cern.ch/kubernetes
version: v1.2.1
ccManager:
repository: registry.cern.ch/kubernetes
version: v0.1.1
devicePlugin:
repository: registry.cern.ch/kubernetes
version: v0.12.2-ubi8
config:
name: nvidia-device-plugin-config
default: "default"

0 comments on commit ee67250

Please sign in to comment.