diff --git a/examples/addons/volumes/lammps-pvc.yaml b/examples/addons/volumes/lammps-pvc.yaml new file mode 100644 index 0000000..b96fae4 --- /dev/null +++ b/examples/addons/volumes/lammps-pvc.yaml @@ -0,0 +1,56 @@ +apiVersion: flux-framework.org/v1alpha2 +kind: MetricSet +metadata: + labels: + app.kubernetes.io/name: metricset + app.kubernetes.io/instance: metricset-sample + name: metricset-sample +spec: + # Number of pods for lammps (one launcher, the rest workers) + pods: 4 + + # Keep interactive so we can decide when to quit + logging: + interactive: true + + metrics: + + # Running more scaled lammps is our main goal + - name: app-lammps + + # This has intel mpi on rocky linux, per suggested by Google + image: ghcr.io/converged-computing/metric-lammps-intel-mpi:rocky + + options: + command: lmp -v x 2 -v y 2 -v z 2 -in in.reaxc.hns -nocite + workdir: /opt/lammps/examples/reaxff/HNS + soleTenancy: "true" + + # Add on hpctoolkit, will mount a volume and wrap lammps + addons: + + - name: volume-pvc + options: + name: data + claimName: data + path: /storage + readOnly: "false" + + - name: perf-hpctoolkit + options: + mount: /opt/mnt + # This allows us to ask where is the event blocked / taking more time + events: "-e REALTIME@10000" + + # Use a custom container here too (we have for rocky and ubuntu) + image: ghcr.io/converged-computing/metric-hpctoolkit-view:rocky + + # hpcrun needs to have mpirun in front of hpcrun e.g., + # mpirun hpcrun + prefix: /opt/intel/mpi/2021.8.0/bin/mpirun --hostfile ./hostlist.txt -np 4 -map-by ppr:1:node + + # Ensure the working directory is consistent + workdir: /opt/lammps/examples/reaxff/HNS + + # Target container for entrypoint addition is the launcher, not workers + containerTarget: launcher diff --git a/pkg/addons/addons.go b/pkg/addons/addons.go index 030d4c6..f68e1e4 100644 --- a/pkg/addons/addons.go +++ b/pkg/addons/addons.go @@ -65,34 +65,34 @@ type AddonBase struct { mapOptions map[string]map[string]intstr.IntOrString } -func (b AddonBase) SetOptions(metric *api.MetricAddon) {} -func (b AddonBase) CustomizeEntrypoints([]*specs.ContainerSpec, []*jobset.ReplicatedJob) {} +func (b *AddonBase) SetOptions(metric *api.MetricAddon) {} +func (b *AddonBase) CustomizeEntrypoints([]*specs.ContainerSpec, []*jobset.ReplicatedJob) {} -func (b AddonBase) Validate() bool { +func (b *AddonBase) Validate() bool { return true } -func (b AddonBase) AssembleContainers() []specs.ContainerSpec { +func (b *AddonBase) AssembleContainers() []specs.ContainerSpec { return []specs.ContainerSpec{} } // Assemble Volumes (for now) just generates one -func (b AddonBase) AssembleVolumes() []specs.VolumeSpec { +func (b *AddonBase) AssembleVolumes() []specs.VolumeSpec { return []specs.VolumeSpec{} } -func (b AddonBase) Description() string { +func (b *AddonBase) Description() string { return b.Summary } -func (b AddonBase) Name() string { +func (b *AddonBase) Name() string { return b.Identifier } -func (b AddonBase) Options() map[string]intstr.IntOrString { +func (b *AddonBase) Options() map[string]intstr.IntOrString { return b.options } -func (b AddonBase) ListOptions() map[string][]intstr.IntOrString { +func (b *AddonBase) ListOptions() map[string][]intstr.IntOrString { return b.listOptions } -func (b AddonBase) MapOptions() map[string]map[string]intstr.IntOrString { +func (b *AddonBase) MapOptions() map[string]map[string]intstr.IntOrString { return b.mapOptions } diff --git a/pkg/addons/commands.go b/pkg/addons/commands.go index d231e94..e56f9d6 100644 --- a/pkg/addons/commands.go +++ b/pkg/addons/commands.go @@ -16,6 +16,11 @@ import ( jobset "sigs.k8s.io/jobset/api/jobset/v1alpha2" ) +const ( + commandsName = "commands" + perfCommandsName = "perf-commands" +) + // Perf addon expects the same command structure, but adds sys caps for trace and admin type PerfAddon struct { CommandAddon @@ -37,6 +42,11 @@ func (a *PerfAddon) CustomizeEntrypoints( } } +func (a *PerfAddon) SetOptions(metric *api.MetricAddon) { + a.Identifier = perfCommandsName + a.SetSharedCommandOptions(metric) +} + // addContainerCaps adds capabilities to a container spec func (a *PerfAddon) addContainerCaps( cs []*specs.ContainerSpec, @@ -92,8 +102,13 @@ func (m CommandAddon) Family() string { return AddonFamilyApplication } -// Set custom options / attributes for the metric func (a *CommandAddon) SetOptions(metric *api.MetricAddon) { + a.Identifier = commandsName + a.SetSharedCommandOptions(metric) +} + +// Set custom options / attributes for the metric +func (a *CommandAddon) SetSharedCommandOptions(metric *api.MetricAddon) { target, ok := metric.Options["target"] if ok { a.target = target.StrVal @@ -213,14 +228,14 @@ func init() { // Config map volume type base := AddonBase{ - Identifier: "commands", + Identifier: commandsName, Summary: "customize a metric's entrypoints", } app := CommandAddon{AddonBase: base} Register(&app) base = AddonBase{ - Identifier: "perf-commands", + Identifier: perfCommandsName, Summary: "customize a metric's entrypoints expecting performance tracing (adding ptrace and admin caps)", } cmd := CommandAddon{AddonBase: base} diff --git a/pkg/addons/hpctoolkit.go b/pkg/addons/hpctoolkit.go index d8c539e..25f32a6 100644 --- a/pkg/addons/hpctoolkit.go +++ b/pkg/addons/hpctoolkit.go @@ -25,6 +25,9 @@ import ( // hpcstruct hpctoolkit-sleep-measurements // hpcprof hpctoolkit-sleep-measurements // hpcviewer ./hpctoolkit-lmp-database +const ( + hpctoolkitIdentifier = "perf-hpctoolkit" +) type HPCToolkit struct { ApplicationAddon @@ -121,6 +124,7 @@ func (a *HPCToolkit) SetOptions(metric *api.MetricAddon) { a.volumeName = "hpctoolkit" a.output = "hpctoolkit-result" a.postAnalysis = true + a.Identifier = hpctoolkitIdentifier // UseColor set to anything means to use it output, ok := metric.Options["output"] @@ -399,7 +403,7 @@ sleep infinity func init() { base := AddonBase{ - Identifier: "perf-hpctoolkit", + Identifier: hpctoolkitIdentifier, Summary: "performance tools for measurement and analysis", } app := ApplicationAddon{AddonBase: base} diff --git a/pkg/addons/volumes.go b/pkg/addons/volumes.go index cbad87f..38825dc 100644 --- a/pkg/addons/volumes.go +++ b/pkg/addons/volumes.go @@ -19,6 +19,14 @@ import ( "k8s.io/apimachinery/pkg/util/intstr" ) +const ( + hostPathName = "volume-hostpath" + pvcName = "volume-pvc" + emptyName = "volume-empty" + secretName = "volume-secret" + cmName = "volume-cm" +) + type VolumeBase struct { AddonBase readOnly bool @@ -98,6 +106,8 @@ func (v *ConfigMapVolume) Validate() bool { // Set custom options / attributes for the metric func (v *ConfigMapVolume) SetOptions(metric *api.MetricAddon) { + v.Identifier = cmName + // Set an empty list of items v.items = map[string]string{} @@ -190,6 +200,9 @@ func (v *PersistentVolumeClaim) Validate() bool { // Set custom options / attributes func (v *PersistentVolumeClaim) SetOptions(metric *api.MetricAddon) { + + v.Identifier = pvcName + claimName, ok := metric.Options["claimName"] if ok { v.claimName = claimName.StrVal @@ -207,8 +220,6 @@ func (v *PersistentVolumeClaim) AssembleVolumes() []specs.VolumeSpec { }, }, } - - // ConfigMaps have to be read only! return []specs.VolumeSpec{{ Volume: volume, Path: filepath.Dir(v.path), @@ -236,6 +247,8 @@ func (v *SecretVolume) Validate() bool { // Set custom options / attributes func (v *SecretVolume) SetOptions(metric *api.MetricAddon) { + + v.Identifier = secretName secretName, ok := metric.Options["secretName"] if ok { v.secretName = secretName.StrVal @@ -281,6 +294,8 @@ func (v *HostPathVolume) Validate() bool { // Set custom options / attributes func (v *HostPathVolume) SetOptions(metric *api.MetricAddon) { + v.Identifier = hostPathName + // Name is required! path, ok := metric.Options["hostPath"] if ok { @@ -319,6 +334,7 @@ func (v *EmptyVolume) Validate() bool { // Set custom options / attributes func (v *EmptyVolume) SetOptions(metric *api.MetricAddon) { + v.Identifier = emptyName name, ok := metric.Options["name"] if ok { v.name = name.StrVal @@ -347,7 +363,7 @@ func init() { // Config map volume type base := AddonBase{ - Identifier: "volume-cm", + Identifier: cmName, Summary: "config map volume type", } volBase := VolumeBase{AddonBase: base} @@ -356,7 +372,7 @@ func init() { // Secret volume type base = AddonBase{ - Identifier: "volume-secret", + Identifier: secretName, Summary: "secret volume type", } volBase = VolumeBase{AddonBase: base} @@ -365,7 +381,7 @@ func init() { // Hostpath volume type base = AddonBase{ - Identifier: "volume-hostpath", + Identifier: hostPathName, Summary: "host path volume type", } volBase = VolumeBase{AddonBase: base} @@ -374,7 +390,7 @@ func init() { // persistent volume claim volume type base = AddonBase{ - Identifier: "volume-pvc", + Identifier: pvcName, Summary: "persistent volume claim volume type", } volBase = VolumeBase{AddonBase: base} @@ -383,7 +399,7 @@ func init() { // EmptyVolume base = AddonBase{ - Identifier: "volume-empty", + Identifier: emptyName, Summary: "empty volume type", } volBase = VolumeBase{AddonBase: base} diff --git a/pkg/metrics/base.go b/pkg/metrics/base.go index 5e1ff4b..2237815 100644 --- a/pkg/metrics/base.go +++ b/pkg/metrics/base.go @@ -39,9 +39,10 @@ type BaseMetric struct { func (m *BaseMetric) RegisterAddon(addon *addons.Addon) { a := (*addon) if m.Addons == nil { + logger.Infof("🟧️ Resetting addons - they are unset.") m.Addons = map[string]*addons.Addon{} } - logger.Infof("🟧️ Registering addon %s", a) + logger.Infof("🟧️ Registering addon %s", a.Name()) m.Addons[a.Name()] = addon } @@ -142,10 +143,10 @@ func (m BaseMetric) AddAddons( // These are container specs that need to be written to configmaps cms := []*specs.ContainerSpec{} - logger.Infof("🟧️ Addons to include %s\n", m.Addons) for _, addon := range m.Addons { a := (*addon) + logger.Infof("🟧️ Including Addon", a.Name()) volumes = append(volumes, a.AssembleVolumes()...) // Assemble containers that addons provide, also as specs diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index b85ebf3..a86db07 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -85,10 +85,12 @@ func GetMetric(metric *api.Metric, set *api.MetricSet) (Metric, error) { // Register addons, meaning adding the spec but not instantiating yet (or should we?) for _, a := range metric.Addons { + logger.Infof("Attempting to add addon %s", a.Name) addon, err := addons.GetAddon(&a) if err != nil { return nil, fmt.Errorf("Addon %s for metric %s did not validate", a.Name, metric.Name) } + logger.Infof("Registering addon %s", a.Name) m.RegisterAddon(&addon) }