From e9c2b0a863da4f1c431b1793d856255255094fee Mon Sep 17 00:00:00 2001 From: vsoch Date: Wed, 20 Sep 2023 19:10:04 -0600 Subject: [PATCH] add back fio Signed-off-by: vsoch --- .github/workflows/main.yaml | 2 +- README.md | 1 - controllers/metric/metric_controller.go | 3 +- docs/_static/data/metrics.json | 13 +- docs/getting_started/metrics.md | 9 +- examples/tests/io-fio/metrics.yaml | 16 +-- examples/tests/io-fio/post-run.sh | 2 +- hack/docs-gen/main.go | 2 +- main.go | 2 +- pkg/addons/addons.go | 13 +- pkg/addons/containers.go | 6 - pkg/addons/hpctoolkit.go | 6 +- pkg/addons/volumes.go | 92 ++++++------- pkg/metrics/app/bdas.go | 4 +- pkg/metrics/app/hpl.go | 4 +- pkg/metrics/app/kripke.go | 9 +- pkg/metrics/app/laghos.go | 4 +- pkg/metrics/app/lammps.go | 9 +- pkg/metrics/app/ldms.go | 4 +- pkg/metrics/app/nekbone.go | 3 +- pkg/metrics/app/pennant.go | 9 +- pkg/metrics/app/quicksilver.go | 9 +- pkg/metrics/application.go | 1 + pkg/metrics/base.go | 52 ++++++-- pkg/metrics/io/fio.go | 165 ++++++++++++++++++++++++ pkg/metrics/launcher.go | 26 ---- pkg/metrics/metrics.go | 5 +- pkg/metrics/storage.go | 31 +++++ pkg/metrics/volumes.go | 2 + 29 files changed, 355 insertions(+), 149 deletions(-) create mode 100644 pkg/metrics/io/fio.go diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 1a916a0..3ad6b5f 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -69,7 +69,7 @@ jobs: test: [["app-lammps", "ghcr.io/converged-computing/metric-lammps:latest", 120], #[["perf-hello-world", "ghcr.io/converged-computing/metric-sysstat:latest", 60], #["io-host-volume", "ghcr.io/converged-computing/metric-sysstat:latest", 60], - #["io-fio", "ghcr.io/converged-computing/metric-fio:latest", 120], + ["io-fio", "ghcr.io/converged-computing/metric-fio:latest", 120], #["io-ior", "ghcr.io/converged-computing/metric-ior:latest", 120], ## ["network-chatterbug", "ghcr.io/converged-computing/metric-chatterbug:latest", 120], ["app-nekbone", "ghcr.io/converged-computing/metric-nekbone:latest", 120], diff --git a/README.md b/README.md index 4d37b8a..9cc0420 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,6 @@ To learn more: - Document and automate docs for addons (options, etc.) - Addons likely needs to be a list to support > 1 of one type! Then subsequent changes so it's not 1:1 -- Is there any reason we cannot generate the names for the addon volumes? - We need a way for the entrypoint command to monitor (based on the container) to differ (potentially) - For larger metric collections, we should have a log streaming mode (and not wait for Completed/Successful) diff --git a/controllers/metric/metric_controller.go b/controllers/metric/metric_controller.go index 00327d9..3ed6995 100644 --- a/controllers/metric/metric_controller.go +++ b/controllers/metric/metric_controller.go @@ -108,7 +108,8 @@ func (r *MetricSetReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( set := mctrl.MetricSet{} for _, metric := range spec.Spec.Metrics { - // Get the individual metric, the type will determine the set we add it to + // Get the individual metric + r.Log.Info(fmt.Sprintf("🟦️ Looking for metric %s\n", metric.Name)) m, err := mctrl.GetMetric(&metric, &spec) if err != nil { r.Log.Error(err, fmt.Sprintf("🟥️ We had an issue loading that metric %s!", metric.Name)) diff --git a/docs/_static/data/metrics.json b/docs/_static/data/metrics.json index 0e47ebc..31b8082 100644 --- a/docs/_static/data/metrics.json +++ b/docs/_static/data/metrics.json @@ -1,9 +1,9 @@ [ { - "name": "app-amg", - "description": "parallel algebraic multigrid solver for linear systems arising from problems on unstructured grids", + "name": "", + "description": "", "family": "solver", - "image": "ghcr.io/converged-computing/metric-amg:latest", + "image": "", "url": "https://github.com/LLNL/AMG" }, { @@ -68,5 +68,12 @@ "family": "simulation", "image": "ghcr.io/converged-computing/metric-quicksilver:latest", "url": "https://github.com/LLNL/Quicksilver" + }, + { + "name": "io-fio", + "description": "Flexible IO Tester (FIO)", + "family": "storage", + "image": "ghcr.io/converged-computing/metric-fio:latest", + "url": "https://fio.readthedocs.io/en/latest/fio_doc.html" } ] \ No newline at end of file diff --git a/docs/getting_started/metrics.md b/docs/getting_started/metrics.md index 7bedc40..63e67ef 100644 --- a/docs/getting_started/metrics.md +++ b/docs/getting_started/metrics.md @@ -128,13 +128,16 @@ Options you can set include: |Name | Description | Type | Default | |-----|-------------|------------|------| -|testname | Name for the test | string | test | +| testname | Name for the test | string | test | | blocksize | Size of block to write. It defaults to 4k, but can be set from 256 to 8k. | string | 4k | | iodepth | Number of I/O units to keep in flight against the file. | int | 64 | | size | Total size of file to write | string | 4G | | directory | Directory (usually mounted) to test. | string | /tmp | +| pre | Custom logic / command to run before Fio | string | unset | +| post | Custom logic / command to run after Fio (e.g., cleanup) | string | unset | +| prefix | Prefix to add to running fio commands (like a wrapper) | string | unset | -For the last "directory" we use this location to write a temporary file, which will be cleaned up. +For the "directory" we use this location to write a temporary file, which will be cleaned up. This allows for testing storage mounted from multiple metric pods without worrying about a name conflict. #### io-ior @@ -205,7 +208,7 @@ Variables to customize include: |Name | Description | Option Key | Type | Default | |-----|-------------|------------|------|---------| | commands | Custom list of osu-benchmark one-sided commands to run | listOptions->commands | array | unset uses default set | -| sole-tenancy | Turn off sole tenancy (one pod/node) | string ("false" or "no") | "true" | +| soleTenancy | Turn off sole tenancy (one pod/node) | string ("false" or "no") | "true" | | all | Run ALL benchmarks with defaults | string ("true" or "yes") | "false" | | flags | Overwrite defaults flags (experts only!)| string | Defaults to an ideal set per metric (see [osu-benchmark.go](https://github.com/converged-computing/metrics-operator/blob/main/pkg/metrics/network/osu-benchmark.go))| | timed | String "true" or "yes" to add time prefix to mpirun (for debugging, etc) | string | "false" | diff --git a/examples/tests/io-fio/metrics.yaml b/examples/tests/io-fio/metrics.yaml index 80b7181..252a702 100644 --- a/examples/tests/io-fio/metrics.yaml +++ b/examples/tests/io-fio/metrics.yaml @@ -6,19 +6,17 @@ metadata: app.kubernetes.io/instance: metricset-sample name: metricset-sample spec: - storage: - volume: - # This is the path on the host (e.g., inside kind container) - hostPath: /tmp/workflow - - # This is the path in the container - path: /tmp/workflow - metrics: - # Fio just runs once - no concept of completions / rate - name: io-fio options: size: 1M blocksize: 1K directory: /tmp/workflow + # Fio usually will have a volume as an addon, let's do hostpath here + addons: + - name: volume-hostpath + options: + name: fio-mount + hostPath: /tmp/workflow + path: /tmp/workflow \ No newline at end of file diff --git a/examples/tests/io-fio/post-run.sh b/examples/tests/io-fio/post-run.sh index f9c0beb..4d9d000 100644 --- a/examples/tests/io-fio/post-run.sh +++ b/examples/tests/io-fio/post-run.sh @@ -1,4 +1,4 @@ #!/bin/bash echo "Cleaning up /tmp/workflow in minikube" -minikube ssh -- sudo rm -rf /tmp/workflow +minikube ssh -- sudo rm -rf /tmp/workflow \ No newline at end of file diff --git a/hack/docs-gen/main.go b/hack/docs-gen/main.go index f46eb2e..d213836 100644 --- a/hack/docs-gen/main.go +++ b/hack/docs-gen/main.go @@ -9,7 +9,7 @@ import ( // Metrics are registered here! Importing registers once "github.com/converged-computing/metrics-operator/pkg/metrics" _ "github.com/converged-computing/metrics-operator/pkg/metrics/app" - // _ "github.com/converged-computing/metrics-operator/pkg/metrics/io" + _ "github.com/converged-computing/metrics-operator/pkg/metrics/io" // _ "github.com/converged-computing/metrics-operator/pkg/metrics/network" // _ "github.com/converged-computing/metrics-operator/pkg/metrics/perf" // diff --git a/main.go b/main.go index 86bf409..6b54cc9 100644 --- a/main.go +++ b/main.go @@ -32,7 +32,7 @@ import ( // Metrics are registered here! Importing registers once _ "github.com/converged-computing/metrics-operator/pkg/metrics/app" - // _ "github.com/converged-computing/metrics-operator/pkg/metrics/io" + _ "github.com/converged-computing/metrics-operator/pkg/metrics/io" // _ "github.com/converged-computing/metrics-operator/pkg/metrics/network" // _ "github.com/converged-computing/metrics-operator/pkg/metrics/perf" // diff --git a/pkg/addons/addons.go b/pkg/addons/addons.go index 1c06a02..369957c 100644 --- a/pkg/addons/addons.go +++ b/pkg/addons/addons.go @@ -11,7 +11,6 @@ import ( "fmt" "log" - corev1 "k8s.io/api/core/v1" jobset "sigs.k8s.io/jobset/api/jobset/v1alpha2" api "github.com/converged-computing/metrics-operator/api/v1alpha1" @@ -39,8 +38,7 @@ type Addon interface { MapOptions() map[string]map[string]intstr.IntOrString // What addons can control: - GetVolumes() []corev1.Volume - AssembleVolumes() []specs.VolumeSpec + AssembleVolumes() specs.VolumeSpec AssembleContainers() []specs.ContainerSpec CustomizeEntrypoints([]*specs.ContainerSpec, []*jobset.ReplicatedJob) @@ -67,14 +65,13 @@ func (b AddonBase) CustomizeEntrypoints([]*specs.ContainerSpec, []*jobset.Replic func (b AddonBase) Validate() bool { return true } -func (b AddonBase) GetVolumes() []corev1.Volume { - return []corev1.Volume{} -} func (b AddonBase) AssembleContainers() []specs.ContainerSpec { return []specs.ContainerSpec{} } -func (b AddonBase) AssembleVolumes() []specs.VolumeSpec { - return []specs.VolumeSpec{} + +// Assemble Volumes (for now) just generates one +func (b AddonBase) AssembleVolumes() specs.VolumeSpec { + return specs.VolumeSpec{} } func (b AddonBase) Description() string { diff --git a/pkg/addons/containers.go b/pkg/addons/containers.go index 8f1d520..7a73c9a 100644 --- a/pkg/addons/containers.go +++ b/pkg/addons/containers.go @@ -11,7 +11,6 @@ import ( "fmt" api "github.com/converged-computing/metrics-operator/api/v1alpha1" - "github.com/converged-computing/metrics-operator/pkg/specs" "k8s.io/apimachinery/pkg/util/intstr" ) @@ -151,11 +150,6 @@ func (a *ApplicationAddon) MapOptions() map[string]map[string]intstr.IntOrString } } -// AssembleVolumes for an application -func (a *ApplicationAddon) AssembleVolumes() []specs.VolumeSpec { - return []specs.VolumeSpec{} -} - func init() { // Config map volume type diff --git a/pkg/addons/hpctoolkit.go b/pkg/addons/hpctoolkit.go index 3d8a068..08bde58 100644 --- a/pkg/addons/hpctoolkit.go +++ b/pkg/addons/hpctoolkit.go @@ -32,7 +32,7 @@ type HPCToolkit struct { } // AssembleVolumes to provide an empty volume for the application to share -func (m HPCToolkit) AssembleVolumes() []specs.VolumeSpec { +func (m HPCToolkit) AssembleVolumes() specs.VolumeSpec { volume := corev1.Volume{ Name: "hpctoolkit", VolumeSource: corev1.VolumeSource{ @@ -40,10 +40,10 @@ func (m HPCToolkit) AssembleVolumes() []specs.VolumeSpec { }, } // EmptyDir should be ReadOnly False, and we don't need a mount for it - return []specs.VolumeSpec{{ + return specs.VolumeSpec{ Volume: volume, Mount: false, - }} + } } // Validate we have an executable provided, and args and optional diff --git a/pkg/addons/volumes.go b/pkg/addons/volumes.go index 0ea3d06..77dbb8d 100644 --- a/pkg/addons/volumes.go +++ b/pkg/addons/volumes.go @@ -8,6 +8,8 @@ SPDX-License-Identifier: MIT package addons import ( + "fmt" + "math/rand" "path/filepath" corev1 "k8s.io/api/core/v1" @@ -25,17 +27,25 @@ type VolumeBase struct { } func (v *VolumeBase) DefaultValidate() bool { + + // We require the user to provide a name to ensure they enforce uniqueness if v.name == "" { - logger.Error("All volume addons require a 'name' for reference.") + logger.Error("🟥️ All volume addons require a 'name' for a unique container mount.") return false } if v.path == "" { - logger.Error("All volume addons require a 'path' for the container mount.") + logger.Error("🟥️ All volume addons require a 'path' for the container mount.") return false } return true } +// If not provided, generate a name for the volume +func (v *VolumeBase) generateName() string { + number := rand.Intn(10000) + return fmt.Sprintf("%s-%d", v.name, number) +} + // DefaultSetOptions across volume types for shared attributes func (v *VolumeBase) DefaultSetOptions(metric *api.MetricAddon) { @@ -64,10 +74,6 @@ type ConfigMapVolume struct { // The metrics operator does not create it for you! configMapName string - // name and path of the volume - name string - path string - // Items (key and paths) for the config map items map[string]string } @@ -75,11 +81,11 @@ type ConfigMapVolume struct { // Validate we have an executable provided, and args and optional func (v *ConfigMapVolume) Validate() bool { if v.configMapName == "" { - logger.Error("The volume-cm volume addon requires a 'configMapName' for the existing config map.") + logger.Error("🟥️ The volume-cm volume addon requires a 'configMapName' for the existing config map.") return false } if len(v.items) == 0 { - logger.Error("The volume-cm volume addon requires at least one entry in mapOptions->items, with key value pairs.") + logger.Error("🟥️ The volume-cm volume addon requires at least one entry in mapOptions->items, with key value pairs.") return false } return v.DefaultValidate() @@ -127,7 +133,7 @@ func (v *ConfigMapVolume) MapOptions() map[string]map[string]intstr.IntOrString } // AssembleVolumes for a config map -func (v *ConfigMapVolume) AssembleVolumes() []specs.VolumeSpec { +func (v *ConfigMapVolume) AssembleVolumes() specs.VolumeSpec { // Prepare items as key to path items := []corev1.KeyToPath{} @@ -153,12 +159,12 @@ func (v *ConfigMapVolume) AssembleVolumes() []specs.VolumeSpec { } // ConfigMaps have to be read only! - return []specs.VolumeSpec{{ + return specs.VolumeSpec{ Volume: newVolume, Path: filepath.Dir(v.path), ReadOnly: true, Mount: true, - }} + } } // An existing peristent volume claim @@ -166,15 +172,13 @@ type PersistentVolumeClaim struct { VolumeBase // Path and claim name are always required if a secret isn't defined - name string claimName string - path string } // Validate we have an executable provided, and args and optional func (v *PersistentVolumeClaim) Validate() bool { if v.claimName == "" { - logger.Error("The volume-pvc volume addon requires a 'claimName' for the existing persistent volume claim (pvc).") + logger.Error("🟥️ The volume-pvc volume addon requires a 'claimName' for the existing persistent volume claim (pvc).") return false } return v.DefaultValidate() @@ -190,7 +194,7 @@ func (v *PersistentVolumeClaim) SetOptions(metric *api.MetricAddon) { } // AssembleVolumes for a pvc -func (v *PersistentVolumeClaim) AssembleVolumes() []specs.VolumeSpec { +func (v *PersistentVolumeClaim) AssembleVolumes() specs.VolumeSpec { volume := corev1.Volume{ Name: v.name, VolumeSource: corev1.VolumeSource{ @@ -201,12 +205,12 @@ func (v *PersistentVolumeClaim) AssembleVolumes() []specs.VolumeSpec { } // ConfigMaps have to be read only! - return []specs.VolumeSpec{{ + return specs.VolumeSpec{ Volume: volume, Path: filepath.Dir(v.path), ReadOnly: v.readOnly, Mount: true, - }} + } } // An existing secret @@ -215,14 +219,12 @@ type SecretVolume struct { // secret name is required secretName string - name string - path string } // Validate we have an executable provided, and args and optional func (v *SecretVolume) Validate() bool { if v.secretName == "" { - logger.Error("The volume-secret addon requires a 'secretName' for the existing secret.") + logger.Error("🟥️ The volume-secret addon requires a 'secretName' for the existing secret.") return false } return v.DefaultValidate() @@ -238,7 +240,7 @@ func (v *SecretVolume) SetOptions(metric *api.MetricAddon) { } // AssembleVolumes for a Secret -func (v *SecretVolume) AssembleVolumes() []specs.VolumeSpec { +func (v *SecretVolume) AssembleVolumes() specs.VolumeSpec { volume := corev1.Volume{ Name: v.name, VolumeSource: corev1.VolumeSource{ @@ -247,12 +249,12 @@ func (v *SecretVolume) AssembleVolumes() []specs.VolumeSpec { }, }, } - return []specs.VolumeSpec{{ + return specs.VolumeSpec{ Volume: volume, ReadOnly: v.readOnly, Path: v.path, Mount: true, - }} + } } // A hostPath volume @@ -261,16 +263,12 @@ type HostPathVolume struct { // only the hostpath and name are required hostPath string - - // Path in container - path string - name string } // Validate we have an executable provided, and args and optional func (v *HostPathVolume) Validate() bool { if v.hostPath == "" { - logger.Error("The volume-hostpath addon requires a 'hostPath' for the host path.") + logger.Error("🟥️ The volume-hostpath addon requires a 'hostPath' for the host path.") return false } return v.DefaultValidate() @@ -284,19 +282,11 @@ func (v *HostPathVolume) SetOptions(metric *api.MetricAddon) { if ok { v.hostPath = path.StrVal } - path, ok = metric.Options["path"] - if ok { - v.path = path.StrVal - } - - name, ok := metric.Options["name"] - if ok { - v.name = name.StrVal - } + v.DefaultSetOptions(metric) } // AssembleVolumes for a host volume -func (v *HostPathVolume) AssembleVolumes() []specs.VolumeSpec { +func (v *HostPathVolume) AssembleVolumes() specs.VolumeSpec { volume := corev1.Volume{ Name: v.name, VolumeSource: corev1.VolumeSource{ @@ -305,27 +295,21 @@ func (v *HostPathVolume) AssembleVolumes() []specs.VolumeSpec { }, }, } - return []specs.VolumeSpec{{ + return specs.VolumeSpec{ Volume: volume, Mount: true, Path: v.path, ReadOnly: v.readOnly, - }} + } } // An empty volume requires nothing! Nice! type EmptyVolume struct { VolumeBase - name string - path string } // Validate we have an executable provided, and args and optional func (v *EmptyVolume) Validate() bool { - if v.name == "" { - logger.Error("The volume-empty addon requires a 'name'.") - return false - } return v.DefaultValidate() } @@ -338,20 +322,19 @@ func (v *EmptyVolume) SetOptions(metric *api.MetricAddon) { } // AssembleVolumes for an empty volume -func (v *EmptyVolume) AssembleVolumes() []specs.VolumeSpec { +func (v *EmptyVolume) AssembleVolumes() specs.VolumeSpec { volume := corev1.Volume{ Name: v.name, VolumeSource: corev1.VolumeSource{ EmptyDir: &corev1.EmptyDirVolumeSource{}, }, } - return []specs.VolumeSpec{{ + return specs.VolumeSpec{ Volume: volume, Mount: true, Path: v.path, ReadOnly: v.readOnly, - }} - + } } // TODO likely we need to carry around entrypoints to customize? @@ -376,6 +359,15 @@ func init() { secretVol := SecretVolume{VolumeBase: volBase} Register(&secretVol) + // Hostpath volume type + base = AddonBase{ + Identifier: "volume-hostpath", + Summary: "host path volume type", + } + volBase = VolumeBase{AddonBase: base} + hostVol := HostPathVolume{VolumeBase: volBase} + Register(&hostVol) + // persistent volume claim volume type base = AddonBase{ Identifier: "volume-pvc", diff --git a/pkg/metrics/app/bdas.go b/pkg/metrics/app/bdas.go index a178fb9..aec47db 100644 --- a/pkg/metrics/app/bdas.go +++ b/pkg/metrics/app/bdas.go @@ -121,12 +121,12 @@ echo "%s" } func init() { - launcher := metrics.LauncherWorker{ + base := metrics.BaseMetric{ Identifier: "app-bdas", Summary: "The big data analytic suite contains the K-Means observation label, PCA, and SVM benchmarks.", Container: "ghcr.io/converged-computing/metric-bdas:latest", } - + launcher := metrics.LauncherWorker{BaseMetric: base} BDAS := BDAS{LauncherWorker: launcher} metrics.Register(&BDAS) } diff --git a/pkg/metrics/app/hpl.go b/pkg/metrics/app/hpl.go index de334bf..cae9fcb 100644 --- a/pkg/metrics/app/hpl.go +++ b/pkg/metrics/app/hpl.go @@ -389,12 +389,12 @@ echo "%s" } func init() { - launcher := metrics.LauncherWorker{ + base := metrics.BaseMetric{ Identifier: "app-hpl", Summary: "High-Performance Linpack (HPL)", Container: "ghcr.io/converged-computing/metric-hpl-spack:latest", } - + launcher := metrics.LauncherWorker{BaseMetric: base} HPL := HPL{LauncherWorker: launcher} metrics.Register(&HPL) } diff --git a/pkg/metrics/app/kripke.go b/pkg/metrics/app/kripke.go index e5fffbe..631e999 100644 --- a/pkg/metrics/app/kripke.go +++ b/pkg/metrics/app/kripke.go @@ -59,10 +59,13 @@ func (n Kripke) ListOptions() map[string][]intstr.IntOrString { } func init() { + base := metrics.BaseMetric{ + Identifier: "app-kripke", + Summary: "parallel algebraic multigrid solver for linear systems arising from problems on unstructured grids", + Container: "ghcr.io/converged-computing/metric-kripke:latest", + } launcher := metrics.LauncherWorker{ - Identifier: "app-kripke", - Summary: "parallel algebraic multigrid solver for linear systems arising from problems on unstructured grids", - Container: "ghcr.io/converged-computing/metric-kripke:latest", + BaseMetric: base, WorkerScript: "/metrics_operator/kripke-worker.sh", LauncherScript: "/metrics_operator/kripke-launcher.sh", } diff --git a/pkg/metrics/app/laghos.go b/pkg/metrics/app/laghos.go index 9f78b0f..54ab552 100644 --- a/pkg/metrics/app/laghos.go +++ b/pkg/metrics/app/laghos.go @@ -50,12 +50,12 @@ func (m Laghos) Options() map[string]intstr.IntOrString { } func init() { - launcher := metrics.LauncherWorker{ + base := metrics.BaseMetric{ Identifier: "app-laghos", Summary: "LAGrangian High-Order Solver", Container: "ghcr.io/converged-computing/metric-laghos:latest", } - + launcher := metrics.LauncherWorker{BaseMetric: base} Laghos := Laghos{LauncherWorker: launcher} metrics.Register(&Laghos) } diff --git a/pkg/metrics/app/lammps.go b/pkg/metrics/app/lammps.go index 1c28de5..3c98c2e 100644 --- a/pkg/metrics/app/lammps.go +++ b/pkg/metrics/app/lammps.go @@ -66,10 +66,13 @@ func (m Lammps) Options() map[string]intstr.IntOrString { } func init() { + base := metrics.BaseMetric{ + Identifier: "app-lammps", + Summary: "LAMMPS molecular dynamic simulation", + Container: "ghcr.io/converged-computing/metric-lammps:latest", + } launcher := metrics.LauncherWorker{ - Identifier: "app-lammps", - Summary: "LAMMPS molecular dynamic simulation", - Container: "ghcr.io/converged-computing/metric-lammps:latest", + BaseMetric: base, WorkerScript: "/metrics_operator/lammps-worker.sh", LauncherScript: "/metrics_operator/lammps-launcher.sh", } diff --git a/pkg/metrics/app/ldms.go b/pkg/metrics/app/ldms.go index 7debdd6..08501ef 100644 --- a/pkg/metrics/app/ldms.go +++ b/pkg/metrics/app/ldms.go @@ -137,12 +137,12 @@ echo "%s" } func init() { - app := metrics.BaseMetric{ + base := metrics.BaseMetric{ Identifier: "app-ldms", Summary: "provides LDMS, a low-overhead, low-latency framework for collecting, transferring, and storing metric data on a large distributed computer system.", Container: "ghcr.io/converged-computing/metric-ovis-hpc:latest", } - single := metrics.SingleApplication{BaseMetric: app} + single := metrics.SingleApplication{BaseMetric: base} LDMS := LDMS{SingleApplication: single} metrics.Register(&LDMS) } diff --git a/pkg/metrics/app/nekbone.go b/pkg/metrics/app/nekbone.go index 0bd9b96..cc05541 100644 --- a/pkg/metrics/app/nekbone.go +++ b/pkg/metrics/app/nekbone.go @@ -50,11 +50,12 @@ func (m Nekbone) Options() map[string]intstr.IntOrString { } func init() { - launcher := metrics.LauncherWorker{ + base := metrics.BaseMetric{ Identifier: "app-nekbone", Summary: "A mini-app derived from the Nek5000 CFD code which is a high order, incompressible Navier-Stokes CFD solver based on the spectral element method. The conjugate gradiant solve is compute intense, contains small messages and frequent allreduces.", Container: "ghcr.io/converged-computing/metric-nekbone:latest", } + launcher := metrics.LauncherWorker{BaseMetric: base} Nekbone := Nekbone{LauncherWorker: launcher} metrics.Register(&Nekbone) } diff --git a/pkg/metrics/app/pennant.go b/pkg/metrics/app/pennant.go index b3cff9c..494431f 100644 --- a/pkg/metrics/app/pennant.go +++ b/pkg/metrics/app/pennant.go @@ -50,10 +50,13 @@ func (m Pennant) Options() map[string]intstr.IntOrString { } func init() { + base := metrics.BaseMetric{ + Identifier: "app-pennant", + Summary: "Unstructured mesh hydrodynamics for advanced architectures ", + Container: "ghcr.io/converged-computing/metric-pennant:latest", + } launcher := metrics.LauncherWorker{ - Identifier: "app-pennant", - Summary: "Unstructured mesh hydrodynamics for advanced architectures ", - Container: "ghcr.io/converged-computing/metric-pennant:latest", + BaseMetric: base, WorkerScript: "/metrics_operator/pennant-worker.sh", LauncherScript: "/metrics_operator/pennant-launcher.sh", } diff --git a/pkg/metrics/app/quicksilver.go b/pkg/metrics/app/quicksilver.go index a61e764..7fa8741 100644 --- a/pkg/metrics/app/quicksilver.go +++ b/pkg/metrics/app/quicksilver.go @@ -50,10 +50,13 @@ func (m Quicksilver) Options() map[string]intstr.IntOrString { } func init() { + base := metrics.BaseMetric{ + Identifier: "app-quicksilver", + Summary: "A proxy app for the Monte Carlo Transport Code", + Container: "ghcr.io/converged-computing/metric-quicksilver:latest", + } launcher := metrics.LauncherWorker{ - Identifier: "app-quicksilver", - Summary: "A proxy app for the Monte Carlo Transport Code", - Container: "ghcr.io/converged-computing/metric-quicksilver:latest", + BaseMetric: base, WorkerScript: "/metrics_operator/quicksilver-worker.sh", LauncherScript: "/metrics_operator/quicksilver-launcher.sh", } diff --git a/pkg/metrics/application.go b/pkg/metrics/application.go index 681dc6f..08a71b7 100644 --- a/pkg/metrics/application.go +++ b/pkg/metrics/application.go @@ -65,6 +65,7 @@ func (m *SingleApplication) ReplicatedJobs(spec *api.MetricSet) ([]*jobset.Repli js := []*jobset.ReplicatedJob{} // Generate a replicated job for the applicatino + // An empty jobname will default to "m" the ReplicatedJobName provided by the operator rj, err := AssembleReplicatedJob(spec, true, spec.Spec.Pods, spec.Spec.Pods, "", m.SoleTenancy) if err != nil { return js, err diff --git a/pkg/metrics/base.go b/pkg/metrics/base.go index faa2c17..e07b190 100644 --- a/pkg/metrics/base.go +++ b/pkg/metrics/base.go @@ -33,8 +33,12 @@ type BaseMetric struct { } // RegisterAddon adds an addon to the set, assuming it's already validated -func (m BaseMetric) RegisterAddon(addon *addons.Addon) { +func (m *BaseMetric) RegisterAddon(addon *addons.Addon) { a := (*addon) + if m.Addons == nil { + m.Addons = map[string]*addons.Addon{} + } + logger.Infof("🟧️ Registering addon %s", a) m.Addons[a.Name()] = addon } @@ -68,6 +72,10 @@ func (m BaseMetric) Attributes() *api.ContainerSpec { // Validation func (m BaseMetric) Validate(set *api.MetricSet) bool { + if m.Identifier == "" { + logger.Errorf("Metric %s is missing an identifier.\n", m) + return false + } return true } @@ -80,14 +88,24 @@ func (m BaseMetric) SuccessJobs() []string { return []string{} } -func (m BaseMetric) ReplicatedJobs(set *api.MetricSet) ([]*jobset.ReplicatedJob, error) { - return []*jobset.ReplicatedJob{}, nil -} - func (m BaseMetric) HasSoleTenancy() bool { return m.SoleTenancy } +// Default replicated jobs will generate for N pods, with no shared process namespace (e.g., storage) +func (m *BaseMetric) ReplicatedJobs(spec *api.MetricSet) ([]*jobset.ReplicatedJob, error) { + + js := []*jobset.ReplicatedJob{} + + // An empty jobname will default to "m" the ReplicatedJobName provided by the operator + rj, err := AssembleReplicatedJob(spec, false, spec.Spec.Pods, spec.Spec.Pods, "", m.SoleTenancy) + if err != nil { + return js, err + } + js = []*jobset.ReplicatedJob{rj} + return js, nil +} + // SetDefaultOptions that are shared (possibly) func (m BaseMetric) SetDefaultOptions(metric *api.Metric) { st, ok := metric.Options["soleTenancy"] @@ -110,17 +128,20 @@ func (m BaseMetric) AddAddons( // VolumeMounts can be generated from container specs // For each addon, do custom logic depending on the type // These are the main set of volumes, containers we are going to add - volumes := []specs.VolumeSpec{} + // Organize volumes by unique name + volumes := map[string]specs.VolumeSpec{} // These are addon container specs addonContainers := []specs.ContainerSpec{} - for _, addon := range m.Addons { + logger.Infof("🟧️ Addons to include %s\n", m.Addons) + for _, addon := range m.Addons { a := (*addon) - // Assemble volume specs that addons provide - // These are assumed to exist, and we create mounts for them only - volumes = append(volumes, a.AssembleVolumes()...) + assembledVolume := a.AssembleVolumes() + if assembledVolume.Volume.Name != "" { + volumes[a.Name()] = assembledVolume + } // Assemble containers that addons provide, also as specs addonContainers = append(addonContainers, a.AssembleContainers()...) @@ -130,6 +151,13 @@ func (m BaseMetric) AddAddons( a.CustomizeEntrypoints(containerSpecs, rjs) } + // There is a bug here showing lots of nil but I don't know why + logger.Infof("🟧️ Volumes that are going to be added %s\n", volumes) + listing := []specs.VolumeSpec{} + for _, volume := range volumes { + listing = append(listing, volume) + } + // Add containers to the replicated job (filtered based on matching names) containers := addonContainers for _, cs := range containerSpecs { @@ -140,7 +168,7 @@ func (m BaseMetric) AddAddons( for _, rj := range rjs { // We also include the addon volumes, which generally need mount points - rjContainers, err := getReplicatedJobContainers(spec, rj, containers, volumes) + rjContainers, err := getReplicatedJobContainers(spec, rj, containers, listing) if err != nil { return err } @@ -149,7 +177,7 @@ func (m BaseMetric) AddAddons( // And volumes! // containerSpecs are used to generate our metric entrypoint volumes // volumes indicate existing volumes - rj.Template.Spec.Template.Spec.Volumes = getReplicatedJobVolumes(spec, containerSpecs, volumes) + rj.Template.Spec.Template.Spec.Volumes = getReplicatedJobVolumes(spec, containerSpecs, listing) } return nil } diff --git a/pkg/metrics/io/fio.go b/pkg/metrics/io/fio.go new file mode 100644 index 0000000..240ca6f --- /dev/null +++ b/pkg/metrics/io/fio.go @@ -0,0 +1,165 @@ +/* +Copyright 2023 Lawrence Livermore National Security, LLC + (c.f. AUTHORS, NOTICE.LLNS, COPYING) + +SPDX-License-Identifier: MIT +*/ + +package io + +import ( + "fmt" + + api "github.com/converged-computing/metrics-operator/api/v1alpha1" + "k8s.io/apimachinery/pkg/util/intstr" + + "github.com/converged-computing/metrics-operator/pkg/metadata" + metrics "github.com/converged-computing/metrics-operator/pkg/metrics" + "github.com/converged-computing/metrics-operator/pkg/specs" +) + +// FIO means Flexible IO +// https://docs.gitlab.com/ee/administration/operations/filesystem_benchmarking.html + +type Fio struct { + metrics.StorageGeneric + + // Options + testname string + blocksize string + iodepth int + size string + directory string + + // extra commands for pre, post, etc. + pre string + post string + prefix string +} + +func (m Fio) Url() string { + return "https://fio.readthedocs.io/en/latest/fio_doc.html" +} + +// Set custom options / attributes for the metric +func (m *Fio) SetOptions(metric *api.Metric) { + m.ResourceSpec = &metric.Resources + m.AttributeSpec = &metric.Attributes + + // Set defaults for options + m.testname = "test" + m.blocksize = "4k" + m.iodepth = 64 + m.size = "4G" + m.directory = "/tmp" + + v, ok := metric.Options["testname"] + if ok { + m.testname = v.StrVal + } + v, ok = metric.Options["blocksize"] + if ok { + m.blocksize = v.StrVal + } + v, ok = metric.Options["size"] + if ok { + m.size = v.StrVal + } + v, ok = metric.Options["directory"] + if ok { + m.directory = v.StrVal + } + v, ok = metric.Options["iodepth"] + if ok { + m.iodepth = int(v.IntVal) + } + v, ok = metric.Options["prefix"] + if ok { + m.prefix = v.StrVal + } + v, ok = metric.Options["pre"] + if ok { + m.pre = v.StrVal + } + v, ok = metric.Options["post"] + if ok { + m.post = v.StrVal + } +} + +func (m Fio) PrepareContainers( + spec *api.MetricSet, + metric *metrics.Metric, +) []*specs.ContainerSpec { + + // Metadata to add to beginning of run + meta := metrics.Metadata(spec, metric) + + preBlock := `#!/bin/bash +echo "%s" +# Directory (and filename) for test assuming other storage mounts +filename=%s/test-$(cat /dev/urandom | tr -cd 'a-f0-9' | head -c 32) +# Run the pre-command here so it has access to the filename. +%s +command="%s fio --randrepeat=1 --ioengine=libaio --direct=1 --gtod_reduce=1 --name=%s --bs=%s --iodepth=%d --readwrite=randrw --rwmixread=75 --size=%s --filename=$filename --output-format=json" +echo "FIO COMMAND START" +echo $command +echo "FIO COMMAND END" +# FIO just has one command, we don't need to think about completions / etc! +echo "%s" +echo "%s" +` + preBlock = fmt.Sprintf( + preBlock, + meta, + m.directory, + m.pre, + m.prefix, + m.testname, + m.blocksize, + m.iodepth, + m.size, + metadata.CollectionStart, + metadata.Separator, + ) + + postBlock := ` +echo "%s" +# Run command here so it's after collection finish, but before removing the filename +%s +%s rm -rf $filename +%s +` + + interactive := metadata.Interactive(spec.Spec.Logging.Interactive) + postBlock = fmt.Sprintf( + postBlock, + metadata.CollectionEnd, + m.post, + m.prefix, + interactive, + ) + return m.StorageContainerSpec(preBlock, "$command", postBlock) +} + +// Exported options and list options +func (m Fio) Options() map[string]intstr.IntOrString { + return map[string]intstr.IntOrString{ + "testname": intstr.FromString(m.testname), + "blocksize": intstr.FromString(m.blocksize), + "iodepth": intstr.FromInt(m.iodepth), + "size": intstr.FromString(m.size), + "directory": intstr.FromString(m.directory), + } +} + +func init() { + base := metrics.BaseMetric{ + Identifier: "io-fio", + Summary: "Flexible IO Tester (FIO)", + Container: "ghcr.io/converged-computing/metric-fio:latest", + } + storage := metrics.StorageGeneric{BaseMetric: base} + fio := Fio{StorageGeneric: storage} + metrics.Register(&fio) +} diff --git a/pkg/metrics/launcher.go b/pkg/metrics/launcher.go index 89c22d5..3b5b9d6 100644 --- a/pkg/metrics/launcher.go +++ b/pkg/metrics/launcher.go @@ -53,16 +53,6 @@ type LauncherWorker struct { WorkerLetter string } -// Name returns the metric name -func (m LauncherWorker) Name() string { - return m.Identifier -} - -// Description returns the metric description -func (m LauncherWorker) Description() string { - return m.Summary -} - // Family returns a generic performance family func (m LauncherWorker) Family() string { return PerformanceFamily @@ -74,22 +64,6 @@ func (m *LauncherWorker) SuccessJobs() []string { return []string{m.LauncherLetter} } -// Container variables -func (n LauncherWorker) Image() string { - return n.Container -} -func (m LauncherWorker) WorkingDir() string { - return m.Workdir -} - -// Return container resources for the metric container -func (m LauncherWorker) Resources() *api.ContainerResources { - return m.ResourceSpec -} -func (m LauncherWorker) Attributes() *api.ContainerSpec { - return m.AttributeSpec -} - // Set default options / attributes for the launcher metric func (m *LauncherWorker) SetDefaultOptions(metric *api.Metric) { m.ResourceSpec = &metric.Resources diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index 47fea29..134e24f 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -19,7 +19,7 @@ import ( ) var ( - Registry = make(map[string]Metric) + Registry = map[string]Metric{} ) // A general metric is a container added to a JobSet @@ -61,6 +61,7 @@ type Metric interface { // GetMetric returns a metric, if it is known to the metrics operator // We also confirm that the addon exists, validate, and instantiate it. func GetMetric(metric *api.Metric, set *api.MetricSet) (Metric, error) { + if _, ok := Registry[metric.Name]; ok { m := Registry[metric.Name] @@ -90,7 +91,7 @@ func GetMetric(metric *api.Metric, set *api.MetricSet) (Metric, error) { func Register(m Metric) { name := m.Name() if _, ok := Registry[name]; ok { - log.Fatalf("Metric: %s has already been added to the registry", name) + log.Fatalf("Metric: %s has already been added to the registry\n", m) } Registry[name] = m } diff --git a/pkg/metrics/storage.go b/pkg/metrics/storage.go index 36f0f29..2c2ff6a 100644 --- a/pkg/metrics/storage.go +++ b/pkg/metrics/storage.go @@ -7,6 +7,10 @@ SPDX-License-Identifier: MIT package metrics +import ( + "github.com/converged-computing/metrics-operator/pkg/specs" +) + // These are common templates for storage apps. // They define the interface of a Metric. @@ -23,3 +27,30 @@ func (m StorageGeneric) Family() string { func (m StorageGeneric) HasSoleTenancy() bool { return false } + +// StorageContainerSpec gets the storage container spec +// This is identical to the application spec and could be combined +func (m *StorageGeneric) StorageContainerSpec( + preBlock string, + command string, + postBlock string, +) []*specs.ContainerSpec { + + entrypoint := specs.EntrypointScript{ + Name: specs.DeriveScriptKey(DefaultEntrypointScript), + Path: DefaultEntrypointScript, + Pre: preBlock, + Command: command, + Post: postBlock, + } + + return []*specs.ContainerSpec{{ + JobName: ReplicatedJobName, + Image: m.Image(), + Name: "storage", + WorkingDir: m.Workdir, + EntrypointScript: entrypoint, + Resources: m.ResourceSpec, + Attributes: m.AttributeSpec, + }} +} diff --git a/pkg/metrics/volumes.go b/pkg/metrics/volumes.go index 1fea872..7144cfa 100644 --- a/pkg/metrics/volumes.go +++ b/pkg/metrics/volumes.go @@ -105,7 +105,9 @@ func getReplicatedJobVolumes( func getAddonVolumes(vs []specs.VolumeSpec) []corev1.Volume { volumes := []corev1.Volume{} for _, volume := range vs { + logger.Infof("Adding volume %s\n", &volume.Volume) volumes = append(volumes, volume.Volume) } + logger.Infof("Volumes %s\n", volumes) return volumes }