Skip to content

Commit

Permalink
[nvidia] Capture more nvidia commands
Browse files Browse the repository at this point in the history
Capture commands related to nvidia container toolkit.

Related: RHEL-58172

Signed-off-by: Jose Castillo <[email protected]>
  • Loading branch information
jcastill authored and TurboTurtle committed Oct 8, 2024
1 parent b21be0b commit 757d2b3
Showing 1 changed file with 10 additions and 4 deletions.
14 changes: 10 additions & 4 deletions sos/report/plugins/nvidia.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,13 @@ class Nvidia(Plugin, IndependentPlugin):

short_desc = 'Nvidia GPU information'
plugin_name = 'nvidia'
commands = ('nvidia-smi',)
commands = ('nvidia-smi', 'nvidia-ctk',)
services = ('nvidia-persistenced', 'nvidia-fabricmanager',
'nvidia-toolkit-firstboot')

def setup(self):
self.add_copy_spec("/etc/cdi/nvidia.yaml")

subcmds = [
'--list-gpus',
'-q -d PERFORMANCE',
Expand All @@ -29,9 +33,12 @@ def setup(self):
'nvlink -s',
'nvlink -e'
]

self.add_service_status("nvidia-persistenced")
ctk_subcmds = [
'cdi list',
'--version',
]
self.add_cmd_output([f"nvidia-smi {cmd}" for cmd in subcmds])
self.add_cmd_output([f"nvidia-ctk {cmd}" for cmd in ctk_subcmds])

query = ('gpu_name,gpu_bus_id,vbios_version,temperature.gpu,'
'utilization.gpu,memory.total,memory.free,memory.used,'
Expand All @@ -42,6 +49,5 @@ def setup(self):
self.add_cmd_output(
f"nvidia-smi --query-retired-pages={querypages} --format=csv"
)
self.add_journal(boot=0, identifier='nvidia-persistenced')

# vim: set et ts=4 sw=4 :

0 comments on commit 757d2b3

Please sign in to comment.