diff --git a/api/v1alpha2/metric_types.go b/api/v1alpha2/metric_types.go index 2cd1287..64e9cb1 100644 --- a/api/v1alpha2/metric_types.go +++ b/api/v1alpha2/metric_types.go @@ -176,7 +176,7 @@ type Metric struct { // Use a custom container image (advanced users only) // +optional - Image string `json:"image"` + Image string `json:"image,omitempty"` // A Metric addon can be storage (volume) or an application, // It's an additional entity that can customize a replicated job, diff --git a/docs/_static/data/metrics.json b/docs/_static/data/metrics.json index b5250ba..888f186 100644 --- a/docs/_static/data/metrics.json +++ b/docs/_static/data/metrics.json @@ -24,7 +24,7 @@ "name": "app-kripke", "description": "parallel algebraic multigrid solver for linear systems arising from problems on unstructured grids", "family": "solver", - "image": "ghcr.io/converged-computing/metric-kripke:latest", + "image": "parallel algebraic multigrid solver for linear systems arising from problems on unstructured grids", "url": "https://github.com/LLNL/Kripke" }, { diff --git a/examples/tests/perf-lammps-hpctoolkit/metrics-rocky.yaml b/examples/tests/perf-lammps-hpctoolkit/metrics-rocky.yaml new file mode 100644 index 0000000..92c878a --- /dev/null +++ b/examples/tests/perf-lammps-hpctoolkit/metrics-rocky.yaml @@ -0,0 +1,49 @@ +apiVersion: flux-framework.org/v1alpha2 +kind: MetricSet +metadata: + labels: + app.kubernetes.io/name: metricset + app.kubernetes.io/instance: metricset-sample + name: metricset-sample +spec: + # Number of pods for lammps (one launcher, the rest workers) + pods: 4 + logging: + interactive: true + + metrics: + + # Running more scaled lammps is our main goal + - name: app-lammps + + # How to define a custom lammps container (advanced users) + # This is for if you use rocky, not the default + image: ghcr.io/converged-computing/metric-lammps-intel-mpi:rocky + + options: + command: lmp -v x 2 -v y 2 -v z 2 -in in.reaxc.hns -nocite + workdir: /opt/lammps/examples/reaxff/HNS + + # Add on hpctoolkit, will mount a volume and wrap lammps + addons: + - name: perf-hpctoolkit + options: + mount: /opt/mnt + # Where is the event blocked / taking more time + events: "-e REALTIME@100" + + # Use a custom container here too (we have for rocky and ubuntu) + image: ghcr.io/converged-computing/metric-hpctoolkit-view:rocky + + # Don't run post analysis - script will still be generated + # postAnalysis: "false" + + # hpcrun needs to have mpirun in front of hpcrun e.g., + # mpirun hpcrun + prefix: /opt/intel/mpi/2021.8.0/bin/mpirun --hostfile ./hostlist.txt -np 4 --map-by socket + + # Ensure the working directory is consistent + workdir: /opt/lammps/examples/reaxff/HNS + + # Target container for entrypoint addition is the launcher, not workers + containerTarget: launcher \ No newline at end of file diff --git a/examples/tests/perf-lammps-hpctoolkit/metrics.yaml b/examples/tests/perf-lammps-hpctoolkit/metrics.yaml index a779297..cd1b923 100644 --- a/examples/tests/perf-lammps-hpctoolkit/metrics.yaml +++ b/examples/tests/perf-lammps-hpctoolkit/metrics.yaml @@ -15,11 +15,6 @@ spec: # Running more scaled lammps is our main goal - name: app-lammps - - # How to define a custom lammps container (advanced users) - # This is for if you use rocky, not the default - # image: ghcr.io/converged-computing/metric-lammps-intel-mpi:rocky - options: command: lmp -v x 2 -v y 2 -v z 2 -in in.reaxc.hns -nocite workdir: /opt/lammps/examples/reaxff/HNS @@ -32,9 +27,6 @@ spec: # Where is the event blocked / taking more time events: "-e REALTIME@100" - # Use a custom container here too (we have for rocky and ubuntu) - # image: ghcr.io/converged-computing/metric-hpctoolkit-view:rocky - # Don't run post analysis - script will still be generated # postAnalysis: "false" @@ -42,9 +34,6 @@ spec: # mpirun hpcrun prefix: mpirun --hostfile ./hostlist.txt -np 4 --map-by socket - # This is for rocky - #prefix: /opt/intel/mpi/2021.8.0/bin/mpirun --hostfile ./hostlist.txt -np 4 --map-by socket - # Ensure the working directory is consistent workdir: /opt/lammps/examples/reaxff/HNS diff --git a/pkg/addons/addons.go b/pkg/addons/addons.go index bac3b81..95a0cb7 100644 --- a/pkg/addons/addons.go +++ b/pkg/addons/addons.go @@ -10,6 +10,7 @@ package addons import ( "fmt" "log" + "reflect" jobset "sigs.k8s.io/jobset/api/jobset/v1alpha2" @@ -97,10 +98,17 @@ func (b AddonBase) MapOptions() map[string]map[string]intstr.IntOrString { // GetAddon looks up and validates an addon func GetAddon(a *api.MetricAddon) (Addon, error) { - addon, ok := Registry[a.Name] + + // We don't want to change the addon interface/struct itself + template, ok := Registry[a.Name] if !ok { return nil, fmt.Errorf("%s is not a known addon", a.Name) } + templateType := reflect.ValueOf(template) + if templateType.Kind() == reflect.Ptr { + templateType = reflect.Indirect(templateType) + } + addon := reflect.New(templateType.Type()).Interface().(Addon) // Set options before validation addon.SetOptions(a) diff --git a/pkg/metrics/app/amg.go b/pkg/metrics/app/amg.go index 46de65d..15dc22e 100644 --- a/pkg/metrics/app/amg.go +++ b/pkg/metrics/app/amg.go @@ -14,6 +14,12 @@ import ( metrics "github.com/converged-computing/metrics-operator/pkg/metrics" ) +const ( + amgIdentifier = "app-amg" + amgSummary = "parallel algebraic multigrid solver for linear systems arising from problems on unstructured grids" + amgContainer = "ghcr.io/converged-computing/metric-amg:latest" +) + // AMG is a launcher + workers metric application type AMG struct { metrics.LauncherWorker @@ -30,6 +36,12 @@ func (m AMG) Family() string { // Set custom options / attributes for the metric func (m *AMG) SetOptions(metric *api.Metric) { + + // TODO change these to class varaibles? then set in two places... + m.Identifier = amgIdentifier + m.Summary = amgSummary + m.Container = amgContainer + // Set user defined values or fall back to defaults m.Prefix = "mpirun --hostfile ./hostlist.txt" m.Command = "amg" @@ -53,15 +65,11 @@ func (m AMG) Options() map[string]intstr.IntOrString { func init() { base := metrics.BaseMetric{ - Identifier: "app-amg", - Summary: "parallel algebraic multigrid solver for linear systems arising from problems on unstructured grids", - Container: "ghcr.io/converged-computing/metric-amg:latest", - } - launcher := metrics.LauncherWorker{ - BaseMetric: base, - WorkerScript: "/metrics_operator/amg-worker.sh", - LauncherScript: "/metrics_operator/amg-launcher.sh", + Identifier: amgIdentifier, + Summary: amgSummary, + Container: amgContainer, } + launcher := metrics.LauncherWorker{BaseMetric: base} amg := AMG{LauncherWorker: launcher} metrics.Register(&amg) } diff --git a/pkg/metrics/app/bdas.go b/pkg/metrics/app/bdas.go index 2355cb5..d15a9c9 100644 --- a/pkg/metrics/app/bdas.go +++ b/pkg/metrics/app/bdas.go @@ -18,6 +18,12 @@ import ( "github.com/converged-computing/metrics-operator/pkg/specs" ) +const ( + bdasIdentifier = "app-bdas" + bdasSummary = "The big data analytic suite contains the K-Means observation label, PCA, and SVM benchmarks." + bdasContainer = "ghcr.io/converged-computing/metric-bdas:latest" +) + type BDAS struct { metrics.LauncherWorker } @@ -34,6 +40,11 @@ func (m BDAS) Url() string { // Set custom options / attributes for the metric func (m *BDAS) SetOptions(metric *api.Metric) { + // Metadatqa + m.Identifier = bdasIdentifier + m.Summary = bdasSummary + m.Container = bdasContainer + // Set user defined values or fall back to defaults m.Prefix = "/bin/bash" m.Command = "mpirun --allow-run-as-root -np 4 --hostfile ./hostlist.txt Rscript /opt/bdas/benchmarks/r/princomp.r 250 50" @@ -118,9 +129,9 @@ echo "%s" func init() { base := metrics.BaseMetric{ - Identifier: "app-bdas", - Summary: "The big data analytic suite contains the K-Means observation label, PCA, and SVM benchmarks.", - Container: "ghcr.io/converged-computing/metric-bdas:latest", + Identifier: bdasIdentifier, + Summary: bdasSummary, + Container: bdasContainer, } launcher := metrics.LauncherWorker{BaseMetric: base} BDAS := BDAS{LauncherWorker: launcher} diff --git a/pkg/metrics/app/hpl.go b/pkg/metrics/app/hpl.go index 9ac414d..ab9ce75 100644 --- a/pkg/metrics/app/hpl.go +++ b/pkg/metrics/app/hpl.go @@ -21,6 +21,12 @@ import ( // https://www.netlib.org/benchmark/hpl/ // https://ulhpc-tutorials.readthedocs.io/en/production/parallel/mpi/HPL/ +const ( + hplIdentifier = "app-hpl" + hplSummary = "High-Performance Linpack (HPL)" + hplContainer = "ghcr.io/converged-computing/metric-hpl-spack:latest" +) + // Default input file hpl.dat // The output of this is Ns, memory is in GiB // -m 128 -NB 192 -r 0.3 -N 2: translates to --mem 128 -NB ${blocksize} -r 0.3 -N ${pods} @@ -122,6 +128,10 @@ func (m *HPL) SetOptions(metric *api.Metric) { m.ResourceSpec = &metric.Resources m.AttributeSpec = &metric.Attributes + m.Identifier = hplIdentifier + m.Summary = hplSummary + m.Container = hplContainer + // Defaults for hpl.dat values. // memory and pods (nodes) calculated on the fly, unless otherwise provided m.ratio = "0.3" @@ -394,9 +404,9 @@ echo "%s" func init() { base := metrics.BaseMetric{ - Identifier: "app-hpl", - Summary: "High-Performance Linpack (HPL)", - Container: "ghcr.io/converged-computing/metric-hpl-spack:latest", + Identifier: hplIdentifier, + Summary: hplSummary, + Container: hplContainer, } launcher := metrics.LauncherWorker{BaseMetric: base} HPL := HPL{LauncherWorker: launcher} diff --git a/pkg/metrics/app/kripke.go b/pkg/metrics/app/kripke.go index 3f89c6c..bcd1099 100644 --- a/pkg/metrics/app/kripke.go +++ b/pkg/metrics/app/kripke.go @@ -14,6 +14,12 @@ import ( metrics "github.com/converged-computing/metrics-operator/pkg/metrics" ) +const ( + kripkeIdentifier = "app-kripke" + kripkeSummary = "parallel algebraic multigrid solver for linear systems arising from problems on unstructured grids" + kripkeContainer = "ghcr.io/converged-computing/metric-kripke:latest" +) + type Kripke struct { metrics.LauncherWorker } @@ -30,6 +36,10 @@ func (m Kripke) Family() string { // Set custom options / attributes for the metric func (m *Kripke) SetOptions(metric *api.Metric) { + m.Identifier = kripkeIdentifier + m.Summary = kripkeSummary + m.Container = kripkeContainer + // Set user defined values or fall back to defaults m.Prefix = "mpirun --hostfile ./hostlist.txt" m.Command = "kripke" @@ -56,15 +66,11 @@ func (n Kripke) ListOptions() map[string][]intstr.IntOrString { func init() { base := metrics.BaseMetric{ - Identifier: "app-kripke", - Summary: "parallel algebraic multigrid solver for linear systems arising from problems on unstructured grids", - Container: "ghcr.io/converged-computing/metric-kripke:latest", - } - launcher := metrics.LauncherWorker{ - BaseMetric: base, - WorkerScript: "/metrics_operator/kripke-worker.sh", - LauncherScript: "/metrics_operator/kripke-launcher.sh", + Identifier: kripkeIdentifier, + Summary: kripkeSummary, + Container: kripkeSummary, } + launcher := metrics.LauncherWorker{BaseMetric: base} kripke := Kripke{LauncherWorker: launcher} metrics.Register(&kripke) } diff --git a/pkg/metrics/app/laghos.go b/pkg/metrics/app/laghos.go index b6b74b0..c0a32de 100644 --- a/pkg/metrics/app/laghos.go +++ b/pkg/metrics/app/laghos.go @@ -14,6 +14,12 @@ import ( metrics "github.com/converged-computing/metrics-operator/pkg/metrics" ) +const ( + laghosIdentifier = "app-laghos" + laghosSummary = "LAGrangian High-Order Solver" + laghosContainer = "ghcr.io/converged-computing/metric-laghos:latest" +) + type Laghos struct { metrics.LauncherWorker } @@ -29,6 +35,11 @@ func (m Laghos) Url() string { // Set custom options / attributes for the metric func (m *Laghos) SetOptions(metric *api.Metric) { + + m.Identifier = laghosIdentifier + m.Summary = laghosSummary + m.Container = laghosSummary + // Set user defined values or fall back to defaults m.Prefix = "/bin/bash" m.Command = "mpirun -np 4 --hostfile ./hostlist.txt ./laghos" @@ -47,9 +58,9 @@ func (m Laghos) Options() map[string]intstr.IntOrString { func init() { base := metrics.BaseMetric{ - Identifier: "app-laghos", - Summary: "LAGrangian High-Order Solver", - Container: "ghcr.io/converged-computing/metric-laghos:latest", + Identifier: laghosIdentifier, + Summary: laghosSummary, + Container: laghosContainer, } launcher := metrics.LauncherWorker{BaseMetric: base} Laghos := Laghos{LauncherWorker: launcher} diff --git a/pkg/metrics/app/lammps.go b/pkg/metrics/app/lammps.go index 2cf7fa4..2c7849e 100644 --- a/pkg/metrics/app/lammps.go +++ b/pkg/metrics/app/lammps.go @@ -18,6 +18,12 @@ import ( "github.com/converged-computing/metrics-operator/pkg/specs" ) +const ( + lammpsIdentifier = "app-lammps" + lammpsSummary = "LAMMPS molecular dynamic simulation" + lammpsContainer = "ghcr.io/converged-computing/metric-lammps:latest" +) + type Lammps struct { metrics.LauncherWorker } @@ -33,6 +39,12 @@ func (m Lammps) Family() string { // Set custom options / attributes for the metric func (m *Lammps) SetOptions(metric *api.Metric) { + + // Default metric options, these are overridden when we reflect + m.Identifier = lammpsIdentifier + m.Summary = lammpsSummary + m.Container = lammpsContainer + // Set user defined values or fall back to defaults // This is a more manual approach that puts the user in charge of determining the entire command // This more closely matches what we might do on HPC :) @@ -109,17 +121,14 @@ echo "%s" return []*specs.ContainerSpec{&launcherContainer, &workerContainer} } +// TODO can we have a new function instead? func init() { base := metrics.BaseMetric{ - Identifier: "app-lammps", - Summary: "LAMMPS molecular dynamic simulation", - Container: "ghcr.io/converged-computing/metric-lammps:latest", - } - launcher := metrics.LauncherWorker{ - BaseMetric: base, - WorkerScript: "/metrics_operator/lammps-worker.sh", - LauncherScript: "/metrics_operator/lammps-launcher.sh", + Identifier: lammpsIdentifier, + Summary: lammpsSummary, + Container: lammpsContainer, } + launcher := metrics.LauncherWorker{BaseMetric: base} lammps := Lammps{LauncherWorker: launcher} metrics.Register(&lammps) } diff --git a/pkg/metrics/app/ldms.go b/pkg/metrics/app/ldms.go index 5c77f52..f6534d2 100644 --- a/pkg/metrics/app/ldms.go +++ b/pkg/metrics/app/ldms.go @@ -18,6 +18,12 @@ import ( "github.com/converged-computing/metrics-operator/pkg/specs" ) +const ( + ldmsIdentifier = "app-ldms" + ldmsSummary = "provides LDMS, a low-overhead, low-latency framework for collecting, transferring, and storing metric data on a large distributed computer system." + ldmsContainer = "ghcr.io/converged-computing/metric-ovis-hpc:latest" +) + type LDMS struct { metrics.SingleApplication @@ -40,11 +46,15 @@ func (m LDMS) Url() string { func (m *LDMS) SetOptions(metric *api.Metric) { m.ResourceSpec = &metric.Resources m.AttributeSpec = &metric.Attributes + + m.Identifier = ldmsIdentifier + m.Container = ldmsContainer + m.Summary = ldmsSummary m.rate = 10 // Set user defined values or fall back to defaults m.command = "ldms_ls -h localhost -x sock -p 10444 -l -v" - m.WorkingDir = "/opt" + m.Workdir = "/opt" command, ok := metric.Options["command"] if ok { @@ -52,7 +62,7 @@ func (m *LDMS) SetOptions(metric *api.Metric) { } workdir, ok := metric.Options["workdir"] if ok { - m.WorkingDir = workdir.StrVal + m.Workdir = workdir.StrVal } completions, ok := metric.Options["completions"] if ok { @@ -72,7 +82,7 @@ func (m LDMS) Options() map[string]intstr.IntOrString { "rate": intstr.FromInt(int(m.rate)), "completions": intstr.FromInt(int(m.completions)), "command": intstr.FromString(m.command), - "workdir": intstr.FromString(m.WorkingDir), + "workdir": intstr.FromString(m.Workdir), } } func (n LDMS) ListOptions() map[string][]intstr.IntOrString { @@ -138,9 +148,9 @@ echo "%s" func init() { base := metrics.BaseMetric{ - Identifier: "app-ldms", - Summary: "provides LDMS, a low-overhead, low-latency framework for collecting, transferring, and storing metric data on a large distributed computer system.", - Container: "ghcr.io/converged-computing/metric-ovis-hpc:latest", + Identifier: ldmsIdentifier, + Summary: ldmsSummary, + Container: ldmsContainer, } single := metrics.SingleApplication{BaseMetric: base} LDMS := LDMS{SingleApplication: single} diff --git a/pkg/metrics/app/nekbone.go b/pkg/metrics/app/nekbone.go index 586c473..21e7d34 100644 --- a/pkg/metrics/app/nekbone.go +++ b/pkg/metrics/app/nekbone.go @@ -14,6 +14,12 @@ import ( metrics "github.com/converged-computing/metrics-operator/pkg/metrics" ) +const ( + nekboneIdentifier = "app-nekbone" + nekboneSummary = "A mini-app derived from the Nek5000 CFD code which is a high order, incompressible Navier-Stokes CFD solver based on the spectral element method. The conjugate gradiant solve is compute intense, contains small messages and frequent allreduces." + nekboneContainer = "ghcr.io/converged-computing/metric-nekbone:latest" +) + type Nekbone struct { metrics.LauncherWorker } @@ -30,6 +36,9 @@ func (m Nekbone) Url() string { // Set custom options / attributes for the metric func (m *Nekbone) SetOptions(metric *api.Metric) { // Set user defined values or fall back to defaults + m.Identifier = nekboneIdentifier + m.Summary = nekboneSummary + m.Container = nekboneContainer m.Prefix = "/bin/bash" m.Command = "mpiexec --hostfile ./hostlist.txt -np 2 ./nekbone" m.Workdir = "/root/nekbone-3.0/test/example2" @@ -47,9 +56,9 @@ func (m Nekbone) Options() map[string]intstr.IntOrString { func init() { base := metrics.BaseMetric{ - Identifier: "app-nekbone", - Summary: "A mini-app derived from the Nek5000 CFD code which is a high order, incompressible Navier-Stokes CFD solver based on the spectral element method. The conjugate gradiant solve is compute intense, contains small messages and frequent allreduces.", - Container: "ghcr.io/converged-computing/metric-nekbone:latest", + Identifier: nekboneIdentifier, + Summary: nekboneSummary, + Container: nekboneContainer, } launcher := metrics.LauncherWorker{BaseMetric: base} Nekbone := Nekbone{LauncherWorker: launcher} diff --git a/pkg/metrics/app/pennant.go b/pkg/metrics/app/pennant.go index 20d2d1e..9f716ba 100644 --- a/pkg/metrics/app/pennant.go +++ b/pkg/metrics/app/pennant.go @@ -14,6 +14,12 @@ import ( metrics "github.com/converged-computing/metrics-operator/pkg/metrics" ) +const ( + pennantIdentifier = "app-pennant" + pennantSummary = "Unstructured mesh hydrodynamics for advanced architectures " + pennantContainer = "ghcr.io/converged-computing/metric-pennant:latest" +) + type Pennant struct { metrics.LauncherWorker } @@ -29,6 +35,11 @@ func (m Pennant) Url() string { // Set custom options / attributes for the metric func (m *Pennant) SetOptions(metric *api.Metric) { + + m.Container = pennantContainer + m.Identifier = pennantIdentifier + m.Summary = pennantSummary + // Set user defined values or fall back to defaults m.Prefix = "mpirun --hostfile ./hostlist.txt" m.Command = "pennant /opt/pennant/test/sedovsmall/sedovsmall.pnt" @@ -47,15 +58,11 @@ func (m Pennant) Options() map[string]intstr.IntOrString { func init() { base := metrics.BaseMetric{ - Identifier: "app-pennant", - Summary: "Unstructured mesh hydrodynamics for advanced architectures ", - Container: "ghcr.io/converged-computing/metric-pennant:latest", - } - launcher := metrics.LauncherWorker{ - BaseMetric: base, - WorkerScript: "/metrics_operator/pennant-worker.sh", - LauncherScript: "/metrics_operator/pennant-launcher.sh", + Identifier: pennantIdentifier, + Summary: pennantSummary, + Container: pennantContainer, } + launcher := metrics.LauncherWorker{BaseMetric: base} Pennant := Pennant{LauncherWorker: launcher} metrics.Register(&Pennant) } diff --git a/pkg/metrics/app/quicksilver.go b/pkg/metrics/app/quicksilver.go index 72ed53e..56477eb 100644 --- a/pkg/metrics/app/quicksilver.go +++ b/pkg/metrics/app/quicksilver.go @@ -14,6 +14,12 @@ import ( metrics "github.com/converged-computing/metrics-operator/pkg/metrics" ) +const ( + qsIdentifier = "app-quicksilver" + qsSummary = "A proxy app for the Monte Carlo Transport Code" + qsContainer = "ghcr.io/converged-computing/metric-quicksilver:latest" +) + type Quicksilver struct { metrics.LauncherWorker } @@ -29,6 +35,11 @@ func (m Quicksilver) Url() string { // Set custom options / attributes for the metric func (m *Quicksilver) SetOptions(metric *api.Metric) { + + m.Identifier = qsIdentifier + m.Summary = qsSummary + m.Container = qsContainer + // Set user defined values or fall back to defaults m.Prefix = "mpirun --hostfile ./hostlist.txt" m.Command = "qs /opt/quicksilver/Examples/CORAL2_Benchmark/Problem1/Coral2_P1.inp" @@ -47,15 +58,11 @@ func (m Quicksilver) Options() map[string]intstr.IntOrString { func init() { base := metrics.BaseMetric{ - Identifier: "app-quicksilver", - Summary: "A proxy app for the Monte Carlo Transport Code", - Container: "ghcr.io/converged-computing/metric-quicksilver:latest", - } - launcher := metrics.LauncherWorker{ - BaseMetric: base, - WorkerScript: "/metrics_operator/quicksilver-worker.sh", - LauncherScript: "/metrics_operator/quicksilver-launcher.sh", + Identifier: qsIdentifier, + Summary: qsSummary, + Container: qsContainer, } + launcher := metrics.LauncherWorker{BaseMetric: base} Quicksilver := Quicksilver{LauncherWorker: launcher} metrics.Register(&Quicksilver) } diff --git a/pkg/metrics/application.go b/pkg/metrics/application.go index e3d85fb..1b9997a 100644 --- a/pkg/metrics/application.go +++ b/pkg/metrics/application.go @@ -51,7 +51,7 @@ func (m *SingleApplication) ApplicationContainerSpec( JobName: ReplicatedJobName, Image: m.Image(), Name: "app", - WorkingDir: m.WorkingDir, + WorkingDir: m.Workdir, EntrypointScript: entrypoint, Resources: m.ResourceSpec, Attributes: m.AttributeSpec, diff --git a/pkg/metrics/base.go b/pkg/metrics/base.go index dde54b1..5e1ff4b 100644 --- a/pkg/metrics/base.go +++ b/pkg/metrics/base.go @@ -20,7 +20,7 @@ type BaseMetric struct { Identifier string Summary string Container string - WorkingDir string + Workdir string // A custom container can be used to replace the application // (typically advanced users only) diff --git a/pkg/metrics/io/fio.go b/pkg/metrics/io/fio.go index ea62193..37854b5 100644 --- a/pkg/metrics/io/fio.go +++ b/pkg/metrics/io/fio.go @@ -21,6 +21,12 @@ import ( // FIO means Flexible IO // https://docs.gitlab.com/ee/administration/operations/filesystem_benchmarking.html +const ( + fioIdentifier = "io-fio" + fioSummary = "Flexible IO Tester (FIO)" + fioContainer = "ghcr.io/converged-computing/metric-fio:latest" +) + type Fio struct { metrics.StorageGeneric @@ -49,6 +55,10 @@ func (m *Fio) SetOptions(metric *api.Metric) { m.ResourceSpec = &metric.Resources m.AttributeSpec = &metric.Attributes + m.Identifier = fioIdentifier + m.Summary = fioSummary + m.Container = fioContainer + // Set defaults for options m.testname = "test" m.blocksize = "4k" @@ -174,9 +184,9 @@ func (m Fio) Options() map[string]intstr.IntOrString { func init() { base := metrics.BaseMetric{ - Identifier: "io-fio", - Summary: "Flexible IO Tester (FIO)", - Container: "ghcr.io/converged-computing/metric-fio:latest", + Identifier: fioIdentifier, + Summary: fioSummary, + Container: fioContainer, } storage := metrics.StorageGeneric{BaseMetric: base} fio := Fio{StorageGeneric: storage} diff --git a/pkg/metrics/io/ior.go b/pkg/metrics/io/ior.go index 74783fa..3890f0e 100644 --- a/pkg/metrics/io/ior.go +++ b/pkg/metrics/io/ior.go @@ -18,6 +18,12 @@ import ( "github.com/converged-computing/metrics-operator/pkg/specs" ) +const ( + iorIdentifier = "io-ior" + iorSummary = "HPC IO Benchmark" + iorContainer = "ghcr.io/converged-computing/metric-ior:latest" +) + // Ior means Flexible IO // https://docs.gitlab.com/ee/administration/operations/filesystem_benchmarking.html @@ -40,6 +46,10 @@ func (m *Ior) SetOptions(metric *api.Metric) { m.ResourceSpec = &metric.Resources m.AttributeSpec = &metric.Attributes + m.Identifier = iorIdentifier + m.Container = iorContainer + m.Summary = iorSummary + // Set defaults for options m.command = "ior -w -r -o testfile" m.workdir = "/opt/ior" @@ -115,9 +125,9 @@ func (m Ior) Options() map[string]intstr.IntOrString { func init() { base := metrics.BaseMetric{ - Identifier: "io-ior", - Summary: "HPC IO Benchmark", - Container: "ghcr.io/converged-computing/metric-ior:latest", + Identifier: iorIdentifier, + Summary: iorSummary, + Container: iorContainer, } storage := metrics.StorageGeneric{BaseMetric: base} Ior := Ior{StorageGeneric: storage} diff --git a/pkg/metrics/io/sysstat.go b/pkg/metrics/io/sysstat.go index e572892..6c52241 100644 --- a/pkg/metrics/io/sysstat.go +++ b/pkg/metrics/io/sysstat.go @@ -19,6 +19,12 @@ import ( "github.com/converged-computing/metrics-operator/pkg/specs" ) +const ( + iostatIdentifier = "io-sysstat" + iostatSummary = "statistics for Linux tasks (processes) : I/O, CPU, memory, etc." + iostatContainer = "ghcr.io/converged-computing/metric-sysstat:latest" +) + // sysstat provides a tool "iostat" to assess a storage mount // https://github.com/sysstat/sysstat @@ -39,6 +45,11 @@ func (m IOStat) Url() string { // Set custom options / attributes for the metric func (m *IOStat) SetOptions(metric *api.Metric) { + + m.Identifier = iostatIdentifier + m.Summary = iostatSummary + m.Container = iostatContainer + m.rate = 10 m.completions = 0 // infinite m.ResourceSpec = &metric.Resources @@ -138,9 +149,9 @@ func (m IOStat) Options() map[string]intstr.IntOrString { func init() { base := metrics.BaseMetric{ - Identifier: "io-sysstat", - Summary: "statistics for Linux tasks (processes) : I/O, CPU, memory, etc.", - Container: "ghcr.io/converged-computing/metric-sysstat:latest", + Identifier: iostatIdentifier, + Summary: iostatSummary, + Container: iostatContainer, } storage := metrics.StorageGeneric{BaseMetric: base} iostat := IOStat{StorageGeneric: storage} diff --git a/pkg/metrics/launcher.go b/pkg/metrics/launcher.go index 8496b3d..b13d5d7 100644 --- a/pkg/metrics/launcher.go +++ b/pkg/metrics/launcher.go @@ -29,11 +29,6 @@ var ( // be accessible by other packages (and not conflict with function names) type LauncherWorker struct { BaseMetric - - Identifier string - Summary string - Container string - Workdir string ResourceSpec *api.ContainerResources AttributeSpec *api.ContainerSpec diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index ceb680a..b85ebf3 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -10,6 +10,7 @@ package metrics import ( "fmt" "log" + "reflect" api "github.com/converged-computing/metrics-operator/api/v1alpha2" addons "github.com/converged-computing/metrics-operator/pkg/addons" @@ -63,7 +64,15 @@ type Metric interface { func GetMetric(metric *api.Metric, set *api.MetricSet) (Metric, error) { if _, ok := Registry[metric.Name]; ok { - m := Registry[metric.Name] + + // Start with the empty template, and create a copy + // This is important so we don't preserve state to the actaul interface + template := Registry[metric.Name] + templateType := reflect.ValueOf(template) + if templateType.Kind() == reflect.Ptr { + templateType = reflect.Indirect(templateType) + } + m := reflect.New(templateType.Type()).Interface().(Metric) // Set global and custom options on the registry metric from the CRD m.SetOptions(metric) diff --git a/pkg/metrics/network/chatterbug.go b/pkg/metrics/network/chatterbug.go index ce5fd76..b96d0af 100644 --- a/pkg/metrics/network/chatterbug.go +++ b/pkg/metrics/network/chatterbug.go @@ -22,6 +22,12 @@ import ( // ghcr.io/converged-computing/metric-osu-benchmark:latest // https://mvapich.cse.ohio-state.edu/benchmarks/ +const ( + cbIdentifier = "network-chatterbug" + cbSummary = "A suite of communication proxies for HPC applications" + cbContainer = "ghcr.io/converged-computing/metric-chatterbug:latest" +) + var ( // Directory (app) name and executable in /root/chatterbug @@ -64,6 +70,10 @@ func (m *Chatterbug) hasCommand(command string) bool { func (m *Chatterbug) SetOptions(metric *api.Metric) { m.lookup = map[string]bool{} + m.Identifier = cbIdentifier + m.Container = cbContainer + m.Summary = cbSummary + // Default command and args (for a demo) m.command = "stencil3d" m.args = "./stencil3d.x 2 2 2 10 10 10 4 1" @@ -220,15 +230,11 @@ echo "%s" func init() { base := metrics.BaseMetric{ - Identifier: "network-chatterbug", - Summary: "A suite of communication proxies for HPC applications", - Container: "ghcr.io/converged-computing/metric-chatterbug:latest", - } - launcher := metrics.LauncherWorker{ - BaseMetric: base, - WorkerScript: "/metrics_operator/chatterbug-worker.sh", - LauncherScript: "/metrics_operator/chatterbug-launcher.sh", + Identifier: cbIdentifier, + Summary: cbSummary, + Container: cbContainer, } + launcher := metrics.LauncherWorker{BaseMetric: base} bug := Chatterbug{LauncherWorker: launcher} metrics.Register(&bug) } diff --git a/pkg/metrics/network/netmark.go b/pkg/metrics/network/netmark.go index f6c0153..898e18d 100644 --- a/pkg/metrics/network/netmark.go +++ b/pkg/metrics/network/netmark.go @@ -20,6 +20,11 @@ import ( ) // This library is currently private +const ( + netmarkIdentifier = "network-netmark" + netmarkSummary = "point to point networking tool" + netmarkContainer = "vanessa/netmark:latest" +) type Netmark struct { metrics.LauncherWorker @@ -58,6 +63,10 @@ func (m *Netmark) SetOptions(metric *api.Metric) { m.AttributeSpec = &metric.Attributes m.LauncherLetter = "n" + m.Identifier = netmarkIdentifier + m.Summary = netmarkSummary + m.Container = netmarkContainer + // One pod per hostname m.SoleTenancy = true @@ -224,15 +233,11 @@ echo "%s" func init() { base := metrics.BaseMetric{ - Identifier: "network-netmark", - Summary: "point to point networking tool", - Container: "vanessa/netmark:latest", - } - launcher := metrics.LauncherWorker{ - BaseMetric: base, - WorkerScript: "/metrics_operator/netmark-worker.sh", - LauncherScript: "/metrics_operator/netmark-launcher.sh", + Identifier: netmarkIdentifier, + Summary: netmarkSummary, + Container: netmarkContainer, } + launcher := metrics.LauncherWorker{BaseMetric: base} netmark := Netmark{LauncherWorker: launcher} metrics.Register(&netmark) } diff --git a/pkg/metrics/network/osu-benchmark.go b/pkg/metrics/network/osu-benchmark.go index b0747cd..2f8e960 100644 --- a/pkg/metrics/network/osu-benchmark.go +++ b/pkg/metrics/network/osu-benchmark.go @@ -21,6 +21,11 @@ import ( // ghcr.io/converged-computing/metric-osu-benchmark:latest // https://mvapich.cse.ohio-state.edu/benchmarks/ +const ( + OSUIdentifier = "network-osu-benchmark" + OSUSummary = "point to point MPI benchmarks" + OSUContainer = "ghcr.io/converged-computing/metric-osu-benchmark:latest" +) type BenchmarkConfig struct { Workdir string @@ -136,6 +141,11 @@ func (m *OSUBenchmark) addCommand(command string) { // Set custom options / attributes for the metric func (m *OSUBenchmark) SetOptions(metric *api.Metric) { + + m.Identifier = OSUIdentifier + m.Container = OSUContainer + m.Summary = OSUSummary + m.lookup = map[string]bool{} m.commands = []string{} m.sleep = 60 @@ -363,15 +373,11 @@ echo "%s" func init() { base := metrics.BaseMetric{ - Identifier: "network-osu-benchmark", - Summary: "point to point MPI benchmarks", - Container: "ghcr.io/converged-computing/metric-osu-benchmark:latest", - } - launcher := metrics.LauncherWorker{ - BaseMetric: base, - WorkerScript: "/metrics_operator/osu-worker.sh", - LauncherScript: "/metrics_operator/osu-launcher.sh", + Identifier: OSUIdentifier, + Summary: OSUSummary, + Container: OSUContainer, } + launcher := metrics.LauncherWorker{BaseMetric: base} osu := OSUBenchmark{LauncherWorker: launcher} metrics.Register(&osu) } diff --git a/pkg/metrics/perf/sysstat.go b/pkg/metrics/perf/sysstat.go index e26649a..c6d821e 100644 --- a/pkg/metrics/perf/sysstat.go +++ b/pkg/metrics/perf/sysstat.go @@ -18,6 +18,12 @@ import ( "k8s.io/apimachinery/pkg/util/intstr" ) +const ( + pidstatIdentifier = "perf-sysstat" + pidstatSummary = "statistics for Linux tasks (processes) : I/O, CPU, memory, etc." + pidstatContainer = "ghcr.io/converged-computing/metric-sysstat:latest" +) + // sysstat provides a tool "pidstat" that can monitor a PID (along with others) // https://github.com/sysstat/sysstat @@ -40,6 +46,11 @@ func (m PidStat) Url() string { // Set custom options / attributes for the metric func (m *PidStat) SetOptions(metric *api.Metric) { + + m.Identifier = pidstatIdentifier + m.Summary = pidstatSummary + m.Container = pidstatContainer + // Defaults for rate and completions m.rate = 10 m.completions = 0 // infinite @@ -257,9 +268,9 @@ done func init() { base := metrics.BaseMetric{ - Identifier: "perf-sysstat", - Summary: "statistics for Linux tasks (processes) : I/O, CPU, memory, etc.", - Container: "ghcr.io/converged-computing/metric-sysstat:latest", + Identifier: pidstatIdentifier, + Summary: pidstatSummary, + Container: pidstatContainer, } app := metrics.SingleApplication{BaseMetric: base} pidstat := PidStat{SingleApplication: app} diff --git a/pkg/metrics/storage.go b/pkg/metrics/storage.go index 93e9b26..2c2ff6a 100644 --- a/pkg/metrics/storage.go +++ b/pkg/metrics/storage.go @@ -48,7 +48,7 @@ func (m *StorageGeneric) StorageContainerSpec( JobName: ReplicatedJobName, Image: m.Image(), Name: "storage", - WorkingDir: m.WorkingDir, + WorkingDir: m.Workdir, EntrypointScript: entrypoint, Resources: m.ResourceSpec, Attributes: m.AttributeSpec,