Skip to content

Commit

Permalink
Fix #88 - spawn subprograms directly, not via shell
Browse files Browse the repository at this point in the history
  • Loading branch information
Lars T Hansen committed Apr 10, 2024
1 parent b4f3d50 commit c4bb676
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 19 deletions.
10 changes: 6 additions & 4 deletions src/amd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ use crate::util::map;
// too small. This is presumably all driver dependent.)

pub fn get_amd_configuration() -> Option<Vec<gpu::Card>> {
match command::safe_command("rocm-smi --showproductname", TIMEOUT_SECONDS) {
match command::safe_command("rocm-smi", &["--showproductname"], TIMEOUT_SECONDS) {
Ok(raw_text) => {
let mut cards = vec![];
for l in raw_text.lines() {
Expand Down Expand Up @@ -74,9 +74,9 @@ pub fn get_amd_information(user_by_pid: &UserTable) -> Result<Vec<gpu::Process>,
// I've not been able to combine the two invocations of rocm-smi yet; we have to run the command
// twice. Not a happy situation.

match command::safe_command(AMD_CONCISE_COMMAND, TIMEOUT_SECONDS) {
match command::safe_command(AMD_CONCISE_COMMAND, AMD_CONCISE_ARGS, TIMEOUT_SECONDS) {
Ok(concise_raw_text) => {
match command::safe_command(AMD_SHOWPIDGPUS_COMMAND, TIMEOUT_SECONDS) {
match command::safe_command(AMD_SHOWPIDGPUS_COMMAND, AMD_SHOWPIDGPUS_ARGS, TIMEOUT_SECONDS) {
Ok(showpidgpus_raw_text) => Ok(extract_amd_information(
&concise_raw_text,
&showpidgpus_raw_text,
Expand Down Expand Up @@ -180,6 +180,7 @@ PID 28154 is using 1 DRM device(s):
}

const AMD_CONCISE_COMMAND: &str = "rocm-smi";
const AMD_CONCISE_ARGS: &[&str] = &[];

// Return a vector of AMD GPU utilization indexed by device number: (gpu%, mem%)
//
Expand Down Expand Up @@ -238,7 +239,8 @@ GPU Temp (DieEdge) AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU%
assert!(xs.eq(&vec![(99.0, 57.0), (63.0, 5.0)]));
}

const AMD_SHOWPIDGPUS_COMMAND: &str = "rocm-smi --showpidgpus";
const AMD_SHOWPIDGPUS_COMMAND: &str = "rocm-smi";
const AMD_SHOWPIDGPUS_ARGS: &[&str] = &["--showpidgpus"];

// Return a vector of (PID, DEVICES) where DEVICES is a vector of the devices used by the PID. The
// PID is a string, the devices are numbers. See test cases below for the various forms
Expand Down
19 changes: 10 additions & 9 deletions src/command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@ pub enum CmdError {
// especially at https://github.com/rust-lang/rust/issues/45572#issuecomment-860134955. See also
// https://doc.rust-lang.org/std/process/index.html (second code blob under "Handling I/O").

pub fn safe_command(command: &str, timeout_seconds: u64) -> Result<String, CmdError> {
let mut p = match Exec::shell(command)
pub fn safe_command(command: &str, args: &[&str], timeout_seconds: u64) -> Result<String, CmdError> {
let mut p = match Exec::cmd(command)
.args(args)
.stdout(Redirection::Pipe)
.stderr(Redirection::Pipe)
.popen()
Expand Down Expand Up @@ -162,43 +163,43 @@ fn format_failure(command: &str, stdout: &str, stderr: &str) -> String {
#[test]
fn test_safe_command() {
// Should work, because we should be running this in the repo root.
match safe_command("ls Cargo.toml", 2) {
match safe_command("ls", &["Cargo.toml"], 2) {
Ok(_) => {}
Err(_) => assert!(false),
}
// This really needs to be the output
assert!(safe_command("grep '^name =' Cargo.toml", 2) == Ok("name = \"sonar\"\n".to_string()));
assert!(safe_command("grep", &["^name =", "Cargo.toml"], 2) == Ok("name = \"sonar\"\n".to_string()));
// Not found
match safe_command("no-such-command-we-hope", 2) {
match safe_command("no-such-command-we-hope", &[], 2) {
Err(CmdError::CouldNotStart(_)) => {}
_ => {
assert!(false)
}
}
// Wrong permissions, not executable
match safe_command("/etc/passwd", 2) {
match safe_command("/etc/passwd", &[], 2) {
Err(CmdError::CouldNotStart(_)) => {}
_ => {
assert!(false)
}
}
// Should take too long
match safe_command("sleep 7", 2) {
match safe_command("sleep", &["7"], 2) {
Err(CmdError::Hung(_)) => {}
_ => {
assert!(false)
}
}
// Exited with error
match safe_command("ls /abracadabra", 2) {
match safe_command("ls", &["/abracadabra"], 2) {
Err(CmdError::Failed(_)) => {}
_ => {
assert!(false)
}
}
// Should work even though output is large, but we want a file that is always present. The file
// img/sonar.png is 1.2MB, which is probably good enough.
match safe_command("cat img/sonar.png", 5) {
match safe_command("cat", &["img/sonar.png"], 5) {
Ok(_) => {}
Err(_) => {
assert!(false)
Expand Down
15 changes: 9 additions & 6 deletions src/nvidia.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use crate::util::map;
// Parsing all the output lines in order yields the information about all the cards.

pub fn get_nvidia_configuration() -> Option<Vec<gpu::Card>> {
match command::safe_command("nvidia-smi -a", TIMEOUT_SECONDS) {
match command::safe_command("nvidia-smi", &["-a"], TIMEOUT_SECONDS) {
Ok(raw_text) => {
let mut cards = vec![];
let mut looking_for_total = false;
Expand Down Expand Up @@ -77,10 +77,10 @@ pub fn get_nvidia_configuration() -> Option<Vec<gpu::Card>> {
// command could not be found, we return Ok(vec![]).

pub fn get_nvidia_information(user_by_pid: &UserTable) -> Result<Vec<gpu::Process>, String> {
match command::safe_command(NVIDIA_PMON_COMMAND, TIMEOUT_SECONDS) {
match command::safe_command(NVIDIA_PMON_COMMAND, NVIDIA_PMON_ARGS, TIMEOUT_SECONDS) {
Ok(pmon_raw_text) => {
let mut processes = parse_pmon_output(&pmon_raw_text, user_by_pid);
match command::safe_command(NVIDIA_QUERY_COMMAND, TIMEOUT_SECONDS) {
match command::safe_command(NVIDIA_QUERY_COMMAND, NVIDIA_QUERY_ARGS, TIMEOUT_SECONDS) {
Ok(query_raw_text) => {
processes.append(&mut parse_query_output(&query_raw_text, user_by_pid));
Ok(processes)
Expand All @@ -107,7 +107,8 @@ pub fn get_nvidia_information(user_by_pid: &UserTable) -> Result<Vec<gpu::Proces
// TODO: We could consider using the underlying C library instead, but this adds a fair
// amount of complexity. See the nvidia-smi manual page.

const NVIDIA_PMON_COMMAND: &str = "nvidia-smi pmon -c 1 -s mu";
const NVIDIA_PMON_COMMAND: &str = "nvidia-smi";
const NVIDIA_PMON_ARGS: &[&str] = &["pmon", "-c", "1", "-s", "mu"];

// Returns (user-name, pid, command-name) -> (device-mask, gpu-util-pct, gpu-mem-pct, gpu-mem-size-in-kib)
// where the values are summed across all devices and the device-mask is a bitmask for the
Expand Down Expand Up @@ -154,8 +155,10 @@ fn parse_pmon_output(raw_text: &str, user_by_pid: &UserTable) -> Vec<gpu::Proces
// We use this to get information about processes that are not captured by pmon. It's hacky
// but it works.

const NVIDIA_QUERY_COMMAND: &str =
"nvidia-smi --query-compute-apps=pid,used_memory --format=csv,noheader,nounits";
const NVIDIA_QUERY_COMMAND: &str = "nvidia-smi";

const NVIDIA_QUERY_ARGS: &[&str] =
&["--query-compute-apps=pid,used_memory", "--format=csv,noheader,nounits"];

// Same signature as extract_nvidia_pmon_processes(), q.v. but user is always "_zombie_" and command
// is always "_unknown_". Only pids not in user_by_pid are returned.
Expand Down

0 comments on commit c4bb676

Please sign in to comment.