From b48d0e7a2a8a5dbf1504d8a1a26a46151ce8dcbb Mon Sep 17 00:00:00 2001 From: bpetit Date: Tue, 15 Aug 2023 15:01:07 +0200 Subject: [PATCH 01/33] ci: fixing output name of exe file from workflow --- .github/workflows/exe-release-prometheuspush.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/exe-release-prometheuspush.yml b/.github/workflows/exe-release-prometheuspush.yml index 2e8963ca..78bbaa46 100644 --- a/.github/workflows/exe-release-prometheuspush.yml +++ b/.github/workflows/exe-release-prometheuspush.yml @@ -9,7 +9,7 @@ on: - 'book.toml' - 'CONTRIBUTING.md' tags: [ 'v*.*.*', 'dev*.*.*' ] - branches: [ '311-github-workflow-to-build-and-publish-a-exemsi-file-including-signed-rapl-driver-at-each-tagrelease' ] + branches: [ '336-proper-handling-of-windows-service-management' ] env: WRD_VERSION: v0.0.2 @@ -75,9 +75,9 @@ jobs: Import-Module AWS.Tools.Installer Install-AWSToolsModule AWS.Tools.EC2,AWS.Tools.S3 -CleanUp -Confirm:$False Set-AWSCredential -AccessKey ${{ secrets.S3_ACCESS_KEY_ID }} -SecretKey ${{ secrets.S3_SECRET_ACCESS_KEY }} -StoreAs default - mv packaging/windows/Output/scaphandre_installer.exe scaphandre_${GITHUB_REF_NAME}_installer.exe + mv packaging/windows/Output/scaphandre_installer.exe scaphandre_${{ env.GITHUB_REF_NAME }}_installer.exe $clientconfig=@{ SignatureVersion="s3v4" ServiceUrl="https://s3.fr-par.scw.cloud" } - Write-S3Object -EndpointUrl "https://s3.fr-par.scw.cloud" -Region "fr-par" -BucketName "scaphandre" -File scaphandre_${GITHUB_REF_NAME}_installer.exe -key "x86_64/scaphandre_${GITHUB_REF_NAME}_installer.exe" -PublicReadOnly -ClientConfig $clientconfig \ No newline at end of file + Write-S3Object -EndpointUrl "https://s3.fr-par.scw.cloud" -Region "fr-par" -BucketName "scaphandre" -File scaphandre_${{ env.GITHUB_REF_NAME }}_installer.exe -key "x86_64/scaphandre_${GITHUB_REF_NAME}_installer.exe" -PublicReadOnly -ClientConfig $clientconfig \ No newline at end of file From 60eabe4b19c09a77e49faf64d4de3bb2ef0282c4 Mon Sep 17 00:00:00 2001 From: bpetit Date: Tue, 15 Aug 2023 15:16:13 +0200 Subject: [PATCH 02/33] ci: fixing output name of exe file from workflow --- .github/workflows/exe-release-prometheuspush.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/exe-release-prometheuspush.yml b/.github/workflows/exe-release-prometheuspush.yml index 78bbaa46..3976a05a 100644 --- a/.github/workflows/exe-release-prometheuspush.yml +++ b/.github/workflows/exe-release-prometheuspush.yml @@ -75,9 +75,9 @@ jobs: Import-Module AWS.Tools.Installer Install-AWSToolsModule AWS.Tools.EC2,AWS.Tools.S3 -CleanUp -Confirm:$False Set-AWSCredential -AccessKey ${{ secrets.S3_ACCESS_KEY_ID }} -SecretKey ${{ secrets.S3_SECRET_ACCESS_KEY }} -StoreAs default - mv packaging/windows/Output/scaphandre_installer.exe scaphandre_${{ env.GITHUB_REF_NAME }}_installer.exe + mv packaging/windows/Output/scaphandre_installer.exe scaphandre_${{ github.ref.name }}_installer.exe $clientconfig=@{ SignatureVersion="s3v4" ServiceUrl="https://s3.fr-par.scw.cloud" } - Write-S3Object -EndpointUrl "https://s3.fr-par.scw.cloud" -Region "fr-par" -BucketName "scaphandre" -File scaphandre_${{ env.GITHUB_REF_NAME }}_installer.exe -key "x86_64/scaphandre_${GITHUB_REF_NAME}_installer.exe" -PublicReadOnly -ClientConfig $clientconfig \ No newline at end of file + Write-S3Object -EndpointUrl "https://s3.fr-par.scw.cloud" -Region "fr-par" -BucketName "scaphandre" -File scaphandre_${{ env.GITHUB_REF_NAME }}_installer.exe -key "x86_64/scaphandre_${{ github.ref.name }}_installer.exe" -PublicReadOnly -ClientConfig $clientconfig \ No newline at end of file From d3829ebb63fd3c1399146caed42b65f48149dea5 Mon Sep 17 00:00:00 2001 From: bpetit Date: Tue, 15 Aug 2023 15:36:21 +0200 Subject: [PATCH 03/33] ci: fixing output name of exe file from workflow --- .github/workflows/exe-release-prometheuspush.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/exe-release-prometheuspush.yml b/.github/workflows/exe-release-prometheuspush.yml index 3976a05a..3056f635 100644 --- a/.github/workflows/exe-release-prometheuspush.yml +++ b/.github/workflows/exe-release-prometheuspush.yml @@ -75,9 +75,9 @@ jobs: Import-Module AWS.Tools.Installer Install-AWSToolsModule AWS.Tools.EC2,AWS.Tools.S3 -CleanUp -Confirm:$False Set-AWSCredential -AccessKey ${{ secrets.S3_ACCESS_KEY_ID }} -SecretKey ${{ secrets.S3_SECRET_ACCESS_KEY }} -StoreAs default - mv packaging/windows/Output/scaphandre_installer.exe scaphandre_${{ github.ref.name }}_installer.exe + mv packaging/windows/Output/scaphandre_installer.exe scaphandre_${{ github.ref_name }}_installer.exe $clientconfig=@{ SignatureVersion="s3v4" ServiceUrl="https://s3.fr-par.scw.cloud" } - Write-S3Object -EndpointUrl "https://s3.fr-par.scw.cloud" -Region "fr-par" -BucketName "scaphandre" -File scaphandre_${{ env.GITHUB_REF_NAME }}_installer.exe -key "x86_64/scaphandre_${{ github.ref.name }}_installer.exe" -PublicReadOnly -ClientConfig $clientconfig \ No newline at end of file + Write-S3Object -EndpointUrl "https://s3.fr-par.scw.cloud" -Region "fr-par" -BucketName "scaphandre" -File scaphandre_${{ env.GITHUB_REF_NAME }}_installer.exe -key "x86_64/scaphandre_${{ github.ref_name }}_installer.exe" -PublicReadOnly -ClientConfig $clientconfig \ No newline at end of file From 8b3ea5a598bb5f8adc8c91e1fab360b2de09597f Mon Sep 17 00:00:00 2001 From: bpetit Date: Tue, 15 Aug 2023 16:05:59 +0200 Subject: [PATCH 04/33] ci: fixing output name of exe file from workflow --- .github/workflows/exe-release-prometheuspush.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/exe-release-prometheuspush.yml b/.github/workflows/exe-release-prometheuspush.yml index 3056f635..650bfacb 100644 --- a/.github/workflows/exe-release-prometheuspush.yml +++ b/.github/workflows/exe-release-prometheuspush.yml @@ -80,4 +80,4 @@ jobs: SignatureVersion="s3v4" ServiceUrl="https://s3.fr-par.scw.cloud" } - Write-S3Object -EndpointUrl "https://s3.fr-par.scw.cloud" -Region "fr-par" -BucketName "scaphandre" -File scaphandre_${{ env.GITHUB_REF_NAME }}_installer.exe -key "x86_64/scaphandre_${{ github.ref_name }}_installer.exe" -PublicReadOnly -ClientConfig $clientconfig \ No newline at end of file + Write-S3Object -EndpointUrl "https://s3.fr-par.scw.cloud" -Region "fr-par" -BucketName "scaphandre" -File scaphandre_${{ github.ref_name }}_installer.exe -key "x86_64/scaphandre_${{ github.ref_name }}_installer.exe" -PublicReadOnly -ClientConfig $clientconfig \ No newline at end of file From 4da908db48bcc6a635ea3fb0f970d1b41876a2f8 Mon Sep 17 00:00:00 2001 From: bpetit Date: Wed, 16 Aug 2023 15:32:49 +0200 Subject: [PATCH 05/33] feat: enabling to stop the windows service --- src/main.rs | 55 +++++++++++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/src/main.rs b/src/main.rs index 83db7998..50db79e0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -114,16 +114,18 @@ enum ExporterChoice { fn my_service_main(arguments: Vec) { if let Err(_e) = run_service(arguments) { // Handle errors in some way. + } } #[cfg(target_os = "windows")] fn run_service(_arguments: Vec) -> Result<()> { - #[cfg(target_os = "windows")] + let mut stop = false; let event_handler = move |control_event| -> ServiceControlHandlerResult { match control_event { ServiceControl::Stop => { // Handle stop event and return control back to the system. + stop = true; ServiceControlHandlerResult::NoError } // All services must accept Interrogate even if it's a no-op. @@ -131,33 +133,36 @@ fn run_service(_arguments: Vec) -> Result<()> { _ => ServiceControlHandlerResult::NotImplemented, } }; - #[cfg(target_os = "windows")] - if let Ok(system_handler) = service_control_handler::register("Scaphandre", event_handler) { - let next_status = ServiceStatus { - // Should match the one from system service registry - service_type: ServiceType::OWN_PROCESS, - // The new state - current_state: ServiceState::Running, - // Accept stop events when running - controls_accepted: ServiceControlAccept::STOP, - // Used to report an error when starting or stopping only, otherwise must be zero - exit_code: ServiceExitCode::Win32(0), - // Only used for pending states, otherwise must be zero - checkpoint: 0, - // Only used for pending states, otherwise must be zero - wait_hint: Duration::default(), - // Unused for setting status - process_id: None, - }; - - // Tell the system that the service is running now - if let Ok(_status_set) = system_handler.set_service_status(next_status) { - parse_cli_and_run_exporter(); + if ! stop { + if let Ok(system_handler) = service_control_handler::register("Scaphandre", event_handler) { + let next_status = ServiceStatus { + // Should match the one from system service registry + service_type: ServiceType::OWN_PROCESS, + // The new state + current_state: ServiceState::Running, + // Accept stop events when running + controls_accepted: ServiceControlAccept::STOP, + // Used to report an error when starting or stopping only, otherwise must be zero + exit_code: ServiceExitCode::Win32(0), + // Only used for pending states, otherwise must be zero + checkpoint: 0, + // Only used for pending states, otherwise must be zero + wait_hint: Duration::default(), + // Unused for setting status + process_id: None, + }; + + // Tell the system that the service is running now + if let Ok(_status_set) = system_handler.set_service_status(next_status) { + parse_cli_and_run_exporter(); + } else { + panic!("Couldn't set Windows service status."); + } } else { - panic!("Couldn't set Windows service status."); + panic!("Couldn't get Windows system events handler."); } } else { - panic!("Couldn't get Windows system events handler."); + panic!("Service has been stopped !"); } Ok(()) } From cb1b5f2ff6e436847d29f183b0a0e11186876f3c Mon Sep 17 00:00:00 2001 From: bpetit Date: Wed, 16 Aug 2023 16:05:43 +0200 Subject: [PATCH 06/33] feat: enabling to stop the windows service --- src/main.rs | 54 ++++++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/src/main.rs b/src/main.rs index 50db79e0..b71dba89 100644 --- a/src/main.rs +++ b/src/main.rs @@ -133,36 +133,36 @@ fn run_service(_arguments: Vec) -> Result<()> { _ => ServiceControlHandlerResult::NotImplemented, } }; - if ! stop { - if let Ok(system_handler) = service_control_handler::register("Scaphandre", event_handler) { - let next_status = ServiceStatus { - // Should match the one from system service registry - service_type: ServiceType::OWN_PROCESS, - // The new state - current_state: ServiceState::Running, - // Accept stop events when running - controls_accepted: ServiceControlAccept::STOP, - // Used to report an error when starting or stopping only, otherwise must be zero - exit_code: ServiceExitCode::Win32(0), - // Only used for pending states, otherwise must be zero - checkpoint: 0, - // Only used for pending states, otherwise must be zero - wait_hint: Duration::default(), - // Unused for setting status - process_id: None, - }; - - // Tell the system that the service is running now - if let Ok(_status_set) = system_handler.set_service_status(next_status) { - parse_cli_and_run_exporter(); - } else { - panic!("Couldn't set Windows service status."); - } + if let Ok(system_handler) = service_control_handler::register("Scaphandre", event_handler) { + let mut next_status = ServiceStatus { + // Should match the one from system service registry + service_type: ServiceType::OWN_PROCESS, + // The new state + current_state: ServiceState::Running, + // Accept stop events when running + controls_accepted: ServiceControlAccept::STOP, + // Used to report an error when starting or stopping only, otherwise must be zero + exit_code: ServiceExitCode::Win32(0), + // Only used for pending states, otherwise must be zero + checkpoint: 0, + // Only used for pending states, otherwise must be zero + wait_hint: Duration::default(), + // Unused for setting status + process_id: None, + }; + if stop { + next_status.current_state = ServiceState::StopPending; + next_status.exit_code = ServiceExitCode::Win32(0); + next_status.wait_hint = Duration::from_secs(1); + } + // Tell the system that the service is running now + if let Ok(_status_set) = system_handler.set_service_status(next_status) { + parse_cli_and_run_exporter(); } else { - panic!("Couldn't get Windows system events handler."); + panic!("Couldn't set Windows service status."); } } else { - panic!("Service has been stopped !"); + panic!("Couldn't get Windows system events handler."); } Ok(()) } From 3f96f3f05862f1e108ac57d2615d270262820227 Mon Sep 17 00:00:00 2001 From: bpetit Date: Wed, 16 Aug 2023 18:31:26 +0200 Subject: [PATCH 07/33] feat: enabling to stop the windows service --- src/exporters/json.rs | 3 ++- src/exporters/mod.rs | 15 ++++++++++- src/exporters/prometheus.rs | 2 +- src/exporters/prometheuspush.rs | 11 +++++++- src/exporters/qemu.rs | 3 ++- src/exporters/riemann.rs | 3 ++- src/exporters/stdout.rs | 3 ++- src/main.rs | 46 ++++++++++++++++++++++----------- 8 files changed, 64 insertions(+), 22 deletions(-) diff --git a/src/exporters/json.rs b/src/exporters/json.rs index c448cd4f..6733e46f 100644 --- a/src/exporters/json.rs +++ b/src/exporters/json.rs @@ -8,6 +8,7 @@ use std::{ path::{Path, PathBuf}, thread, time::{Duration, Instant}, + sync::mpsc::Receiver }; /// An Exporter that writes power consumption data of the host @@ -156,7 +157,7 @@ struct Report { impl Exporter for JsonExporter { /// Runs [iterate()] every `step` until `timeout` - fn run(&mut self) { + fn run(&mut self, channel: &Receiver) { let step = self.time_step; info!("Measurement step is: {step:?}"); diff --git a/src/exporters/mod.rs b/src/exporters/mod.rs index 32be23d9..3ece8fcb 100644 --- a/src/exporters/mod.rs +++ b/src/exporters/mod.rs @@ -25,6 +25,7 @@ use std::collections::HashMap; use std::fmt; use std::time::Duration; use utils::get_scaphandre_version; +use std::sync::mpsc::Receiver; #[cfg(feature = "containers")] use { docker_sync::{container::Container, Docker}, @@ -108,10 +109,22 @@ impl fmt::Debug for MetricValueType { /// with the structs provided by the sensor. pub trait Exporter { /// Runs the exporter. - fn run(&mut self); + fn run(&mut self, channel: &Receiver); /// The name of the kind of the exporter, for example "json". fn kind(&self) -> &str; + + fn watch_signal(&mut self, channel: &Receiver) -> Option { + match channel.try_recv() { + Ok(received) => { + info!("Received signal: {}", received); + Some(1) + }, + Err(_) => { + None + } + } + } } /// MetricGenerator is an exporter helper structure to collect Scaphandre metrics. diff --git a/src/exporters/prometheus.rs b/src/exporters/prometheus.rs index 05065cc2..3fe8a238 100644 --- a/src/exporters/prometheus.rs +++ b/src/exporters/prometheus.rs @@ -72,7 +72,7 @@ impl PrometheusExporter { impl Exporter for PrometheusExporter { /// Starts an HTTP server to expose the metrics in Prometheus format. - fn run(&mut self) { + fn run(&mut self, channel: Receiver) { info!( "{}: Starting Prometheus exporter", Utc::now().format("%Y-%m-%dT%H:%M:%S") diff --git a/src/exporters/prometheuspush.rs b/src/exporters/prometheuspush.rs index 73981e4f..0ff558c2 100644 --- a/src/exporters/prometheuspush.rs +++ b/src/exporters/prometheuspush.rs @@ -13,6 +13,7 @@ use isahc::{prelude::*, Request}; use std::fmt::Write; use std::thread; use std::time::Duration; +use std::sync::mpsc::Receiver; pub struct PrometheusPushExporter { topo: Topology, @@ -72,7 +73,7 @@ impl PrometheusPushExporter { } impl Exporter for PrometheusPushExporter { - fn run(&mut self) { + fn run(&mut self, channel: &Receiver) { info!( "{}: Starting Prometheus Push exporter", Utc::now().format("%Y-%m-%dT%H:%M:%S") @@ -96,6 +97,10 @@ impl Exporter for PrometheusPushExporter { ); loop { + if self.watch_signal(channel).is_some() { + info!("Daemon/Service has received a stop signal."); + break; + } metric_generator.topology.refresh(); metric_generator.gen_all_metrics(); let mut body = String::from(""); @@ -154,6 +159,10 @@ impl Exporter for PrometheusPushExporter { } } + if self.watch_signal(channel).is_some() { + info!("Daemon/Service has received a stop signal."); + break; + } thread::sleep(Duration::new(self.args.step, 0)); } } diff --git a/src/exporters/qemu.rs b/src/exporters/qemu.rs index de3355e5..239293de 100644 --- a/src/exporters/qemu.rs +++ b/src/exporters/qemu.rs @@ -2,6 +2,7 @@ use crate::exporters::Exporter; use crate::sensors::Topology; use crate::sensors::{utils::ProcessRecord, Sensor}; use std::{fs, io, thread, time}; +use std::sync::mpsc::Receiver; /// An Exporter that extracts power consumption data of running /// Qemu/KVM virtual machines on the host and store those data @@ -17,7 +18,7 @@ pub struct QemuExporter { impl Exporter for QemuExporter { /// Runs [iterate()] in a loop. - fn run(&mut self) { + fn run(&mut self, channel: Receiver) { info!("Starting qemu exporter"); let path = "/var/lib/libvirt/scaphandre"; let cleaner_step = 120; diff --git a/src/exporters/riemann.rs b/src/exporters/riemann.rs index 7635db04..5c36b2ca 100644 --- a/src/exporters/riemann.rs +++ b/src/exporters/riemann.rs @@ -11,6 +11,7 @@ use riemann_client::Client; use std::collections::HashMap; use std::convert::TryFrom; use std::time::{Duration, SystemTime, UNIX_EPOCH}; +use std::sync::mpsc::Receiver; /// Riemann server default ipv4/ipv6 address const DEFAULT_IP_ADDRESS: &str = "localhost"; @@ -168,7 +169,7 @@ impl RiemannExporter { impl Exporter for RiemannExporter { /// Entry point of the RiemannExporter. - fn run(&mut self) { + fn run(&mut self, channel: Receiver) { info!( "{}: Starting Riemann exporter", Utc::now().format("%Y-%m-%dT%H:%M:%S") diff --git a/src/exporters/stdout.rs b/src/exporters/stdout.rs index e3d0717d..1132c99f 100644 --- a/src/exporters/stdout.rs +++ b/src/exporters/stdout.rs @@ -4,6 +4,7 @@ use regex::Regex; use std::fmt::Write; use std::thread; use std::time::{Duration, Instant}; +use std::sync::mpsc::Receiver; /// An Exporter that displays power consumption data of the host /// and its processes on the standard output of the terminal. @@ -53,7 +54,7 @@ pub struct ExporterArgs { impl Exporter for StdoutExporter { /// Runs [iterate()] every `step` until `timeout` - fn run(&mut self) { + fn run(&mut self, channel: &Receiver) { let time_step = Duration::from_secs(self.args.step); let time_limit = if self.args.timeout < 0 { None diff --git a/src/main.rs b/src/main.rs index b71dba89..6c43193e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,6 +3,7 @@ use clap::{command, ArgAction, Parser, Subcommand}; use colored::Colorize; use scaphandre::{exporters, sensors::Sensor}; +use std::sync::mpsc::{self, Receiver}; #[cfg(target_os = "linux")] use scaphandre::sensors::powercap_rapl; @@ -112,20 +113,19 @@ enum ExporterChoice { #[cfg(target_os = "windows")] fn my_service_main(arguments: Vec) { - if let Err(_e) = run_service(arguments) { - // Handle errors in some way. - + if let Err(e) = run_service(arguments) { + panic!("{:?}", e); } } #[cfg(target_os = "windows")] fn run_service(_arguments: Vec) -> Result<()> { - let mut stop = false; + let (tx, rx) = mpsc::channel(); let event_handler = move |control_event| -> ServiceControlHandlerResult { match control_event { ServiceControl::Stop => { // Handle stop event and return control back to the system. - stop = true; + let _ = tx.send(1); ServiceControlHandlerResult::NoError } // All services must accept Interrogate even if it's a no-op. @@ -134,7 +134,7 @@ fn run_service(_arguments: Vec) -> Result<()> { } }; if let Ok(system_handler) = service_control_handler::register("Scaphandre", event_handler) { - let mut next_status = ServiceStatus { + let next_status = ServiceStatus { // Should match the one from system service registry service_type: ServiceType::OWN_PROCESS, // The new state @@ -150,17 +150,31 @@ fn run_service(_arguments: Vec) -> Result<()> { // Unused for setting status process_id: None, }; - if stop { - next_status.current_state = ServiceState::StopPending; - next_status.exit_code = ServiceExitCode::Win32(0); - next_status.wait_hint = Duration::from_secs(1); - } + // next_status.current_state = ServiceState::StopPending; + // next_status.exit_code = ServiceExitCode::Win32(0); + // next_status.wait_hint = Duration::from_secs(1); // Tell the system that the service is running now if let Ok(_status_set) = system_handler.set_service_status(next_status) { - parse_cli_and_run_exporter(); + parse_cli_and_run_exporter(&rx); } else { panic!("Couldn't set Windows service status."); } + + let stop_status = ServiceStatus { + service_type: ServiceType::OWN_PROCESS, + current_state: ServiceState::Stopped, + controls_accepted: ServiceControlAccept::STOP, + exit_code: ServiceExitCode::Win32(0), + checkpoint: 0, + wait_hint: Duration::default(), + process_id: None + }; + + if let Ok(_status_set) = system_handler.set_service_status(stop_status) { + } else { + panic!("Couldn't set Windows service STOP status."); + } + } else { panic!("Couldn't get Windows system events handler."); } @@ -176,10 +190,12 @@ fn main() { } } - parse_cli_and_run_exporter(); + let (_, rx) = mpsc::channel(); + + parse_cli_and_run_exporter(&rx); } -fn parse_cli_and_run_exporter() { +fn parse_cli_and_run_exporter(channel: &Receiver) { let cli = Cli::parse(); loggerv::init_with_verbosity(cli.verbose.into()).expect("unable to initialize the logger"); @@ -189,7 +205,7 @@ fn parse_cli_and_run_exporter() { print_scaphandre_header(exporter.kind()); } - exporter.run(); + exporter.run(channel); } fn build_exporter(choice: ExporterChoice, sensor: &dyn Sensor) -> Box { From f1156046d2d32e97ac68db1c8d34a17e9a9e4453 Mon Sep 17 00:00:00 2001 From: bpetit Date: Wed, 16 Aug 2023 18:50:57 +0200 Subject: [PATCH 08/33] feat: enabling to stop the windows service --- src/exporters/prometheus.rs | 3 ++- src/exporters/riemann.rs | 2 +- src/main.rs | 5 ++++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/exporters/prometheus.rs b/src/exporters/prometheus.rs index 3fe8a238..ad0e6150 100644 --- a/src/exporters/prometheus.rs +++ b/src/exporters/prometheus.rs @@ -18,6 +18,7 @@ use std::{ net::{IpAddr, Ipv4Addr, SocketAddr}, sync::{Arc, Mutex}, time::Duration, + sync::mpsc::Receiver }; /// Default ipv4/ipv6 address to expose the service is any @@ -72,7 +73,7 @@ impl PrometheusExporter { impl Exporter for PrometheusExporter { /// Starts an HTTP server to expose the metrics in Prometheus format. - fn run(&mut self, channel: Receiver) { + fn run(&mut self, channel: &Receiver) { info!( "{}: Starting Prometheus exporter", Utc::now().format("%Y-%m-%dT%H:%M:%S") diff --git a/src/exporters/riemann.rs b/src/exporters/riemann.rs index 5c36b2ca..94843e2e 100644 --- a/src/exporters/riemann.rs +++ b/src/exporters/riemann.rs @@ -169,7 +169,7 @@ impl RiemannExporter { impl Exporter for RiemannExporter { /// Entry point of the RiemannExporter. - fn run(&mut self, channel: Receiver) { + fn run(&mut self, channel: &Receiver) { info!( "{}: Starting Riemann exporter", Utc::now().format("%Y-%m-%dT%H:%M:%S") diff --git a/src/main.rs b/src/main.rs index 6c43193e..91fa1b87 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,6 +4,7 @@ use clap::{command, ArgAction, Parser, Subcommand}; use colored::Colorize; use scaphandre::{exporters, sensors::Sensor}; use std::sync::mpsc::{self, Receiver}; +use std::thread; #[cfg(target_os = "linux")] use scaphandre::sensors::powercap_rapl; @@ -155,7 +156,9 @@ fn run_service(_arguments: Vec) -> Result<()> { // next_status.wait_hint = Duration::from_secs(1); // Tell the system that the service is running now if let Ok(_status_set) = system_handler.set_service_status(next_status) { - parse_cli_and_run_exporter(&rx); + let handle = thread::spawn(move || { + parse_cli_and_run_exporter(&rx); + }); } else { panic!("Couldn't set Windows service status."); } From d3ab6ebfa0079569f5c05f3b22a6824686779de7 Mon Sep 17 00:00:00 2001 From: bpetit Date: Thu, 17 Aug 2023 16:13:51 +0200 Subject: [PATCH 09/33] feat: enabling to stop the windows service --- src/main.rs | 125 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 73 insertions(+), 52 deletions(-) diff --git a/src/main.rs b/src/main.rs index 91fa1b87..98539238 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,7 +3,7 @@ use clap::{command, ArgAction, Parser, Subcommand}; use colored::Colorize; use scaphandre::{exporters, sensors::Sensor}; -use std::sync::mpsc::{self, Receiver}; +use std::sync::mpsc::{self, Receiver, Sender}; use std::thread; #[cfg(target_os = "linux")] @@ -114,19 +114,47 @@ enum ExporterChoice { #[cfg(target_os = "windows")] fn my_service_main(arguments: Vec) { - if let Err(e) = run_service(arguments) { - panic!("{:?}", e); - } -} + use std::thread::JoinHandle; + let graceful_period = 3; -#[cfg(target_os = "windows")] -fn run_service(_arguments: Vec) -> Result<()> { let (tx, rx) = mpsc::channel(); + let start_status = ServiceStatus { + service_type: ServiceType::OWN_PROCESS, // Should match the one from system service registry + current_state: ServiceState::Running, // The new state + controls_accepted: ServiceControlAccept::STOP, // Accept stop events when running + exit_code: ServiceExitCode::Win32(0), // Used to report an error when starting or stopping only, otherwise must be zero + checkpoint: 0, // Only used for pending states, otherwise must be zero + wait_hint: Duration::default(), // Only used for pending states, otherwise must be zero + process_id: None, // Unused for setting status + }; + let stop_status = ServiceStatus { + service_type: ServiceType::OWN_PROCESS, + current_state: ServiceState::Stopped, + controls_accepted: ServiceControlAccept::STOP, + exit_code: ServiceExitCode::Win32(0), + checkpoint: 0, + wait_hint: Duration::default(), + process_id: None + }; + let stoppending_status = ServiceStatus { + service_type: ServiceType::OWN_PROCESS, + current_state: ServiceState::StopPending, + controls_accepted: ServiceControlAccept::STOP, + exit_code: ServiceExitCode::Win32(0), + checkpoint: 0, + wait_hint: Duration::from_secs(graceful_period), + process_id: None + }; + + let mut thread_handle: Option> = None; + let mut stop = false; let event_handler = move |control_event| -> ServiceControlHandlerResult { + println!("Got service control event: {:?}", control_event); match control_event { ServiceControl::Stop => { // Handle stop event and return control back to the system. - let _ = tx.send(1); + stop = true; + let _ = &tx.send(1); ServiceControlHandlerResult::NoError } // All services must accept Interrogate even if it's a no-op. @@ -134,54 +162,47 @@ fn run_service(_arguments: Vec) -> Result<()> { _ => ServiceControlHandlerResult::NotImplemented, } }; - if let Ok(system_handler) = service_control_handler::register("Scaphandre", event_handler) { - let next_status = ServiceStatus { - // Should match the one from system service registry - service_type: ServiceType::OWN_PROCESS, - // The new state - current_state: ServiceState::Running, - // Accept stop events when running - controls_accepted: ServiceControlAccept::STOP, - // Used to report an error when starting or stopping only, otherwise must be zero - exit_code: ServiceExitCode::Win32(0), - // Only used for pending states, otherwise must be zero - checkpoint: 0, - // Only used for pending states, otherwise must be zero - wait_hint: Duration::default(), - // Unused for setting status - process_id: None, - }; - // next_status.current_state = ServiceState::StopPending; - // next_status.exit_code = ServiceExitCode::Win32(0); - // next_status.wait_hint = Duration::from_secs(1); - // Tell the system that the service is running now - if let Ok(_status_set) = system_handler.set_service_status(next_status) { - let handle = thread::spawn(move || { - parse_cli_and_run_exporter(&rx); - }); - } else { - panic!("Couldn't set Windows service status."); - } - let stop_status = ServiceStatus { - service_type: ServiceType::OWN_PROCESS, - current_state: ServiceState::Stopped, - controls_accepted: ServiceControlAccept::STOP, - exit_code: ServiceExitCode::Win32(0), - checkpoint: 0, - wait_hint: Duration::default(), - process_id: None - }; - - if let Ok(_status_set) = system_handler.set_service_status(stop_status) { - } else { - panic!("Couldn't set Windows service STOP status."); + if let Ok(system_handler) = service_control_handler::register("scaphandre", event_handler) { + // Tell the system that the service is running now and run it + match system_handler.set_service_status(start_status.clone()) { + Ok(status_set) => { + println!("Starting main thread, service status has been set: {:?}", status_set); + thread_handle = Some(thread::spawn(move || { parse_cli_and_run_exporter(&rx); })); + }, + Err(e) => { + panic!("Couldn't set Windows service status. Error: {:?}", e); + } + } + loop { + if stop { + // Wait for the thread to finnish, then end the current function + match system_handler.set_service_status(stoppending_status.clone()) { + Ok(status_set) => { + println!("Stop status has been set for service: {:?}", status_set); + if let Some(thr) = thread_handle { + if let Ok(_) = thr.join() { + match system_handler.set_service_status(stop_status.clone()) { + Ok(laststatus_set) => {println!("Scaphandre gracefully stopped: {:?}", laststatus_set);}, + Err(e) => {panic!("Could'nt set Stop status on scaphandre service: {:?}", e);} + } + } else { + panic!("Joining the thread failed."); + } + break; + } else { + panic!("Thread handle was not initialized."); + } + }, + Err(e) => { + panic!("Couldn't set Windows service status. Error: {:?}", e); + } + } + } } - } else { - panic!("Couldn't get Windows system events handler."); + panic!("Failed getting system_handle."); } - Ok(()) } fn main() { From 55b15a4235997acb6c97c241e235b61c0052db15 Mon Sep 17 00:00:00 2001 From: bpetit Date: Thu, 31 Aug 2023 12:02:06 +0200 Subject: [PATCH 10/33] test: testing package with new driver version --- .github/workflows/exe-release-prometheuspush.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/exe-release-prometheuspush.yml b/.github/workflows/exe-release-prometheuspush.yml index 650bfacb..17750501 100644 --- a/.github/workflows/exe-release-prometheuspush.yml +++ b/.github/workflows/exe-release-prometheuspush.yml @@ -12,7 +12,7 @@ on: branches: [ '336-proper-handling-of-windows-service-management' ] env: - WRD_VERSION: v0.0.2 + WRD_VERSION: experimental-multi-socket-1 WRD_BASE_URL: https://github.com/hubblo-org/windows-rapl-driver/releases/download jobs: From 84d4c36874e3e634b42c227c3ac09c41830a7d0e Mon Sep 17 00:00:00 2001 From: bpetit Date: Thu, 31 Aug 2023 12:04:36 +0200 Subject: [PATCH 11/33] test: testing package with new driver version --- .github/workflows/exe-release-prometheuspush.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/exe-release-prometheuspush.yml b/.github/workflows/exe-release-prometheuspush.yml index 17750501..655ede23 100644 --- a/.github/workflows/exe-release-prometheuspush.yml +++ b/.github/workflows/exe-release-prometheuspush.yml @@ -12,7 +12,7 @@ on: branches: [ '336-proper-handling-of-windows-service-management' ] env: - WRD_VERSION: experimental-multi-socket-1 + WRD_VERSION: "experimental-multi-socket-1" WRD_BASE_URL: https://github.com/hubblo-org/windows-rapl-driver/releases/download jobs: From a3ca569639c23d84cb44c3a595b6c469b2915043 Mon Sep 17 00:00:00 2001 From: bpetit Date: Thu, 31 Aug 2023 12:14:08 +0200 Subject: [PATCH 12/33] test: testing package with new driver version --- .github/workflows/exe-release-prometheuspush.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/exe-release-prometheuspush.yml b/.github/workflows/exe-release-prometheuspush.yml index 655ede23..07772b47 100644 --- a/.github/workflows/exe-release-prometheuspush.yml +++ b/.github/workflows/exe-release-prometheuspush.yml @@ -12,7 +12,7 @@ on: branches: [ '336-proper-handling-of-windows-service-management' ] env: - WRD_VERSION: "experimental-multi-socket-1" + WRD_VERSION: experimental\-multi\-socket\-1 WRD_BASE_URL: https://github.com/hubblo-org/windows-rapl-driver/releases/download jobs: From e6acf6c65d16caee2c1d62a8bec5f319abeb5025 Mon Sep 17 00:00:00 2001 From: bpetit Date: Thu, 31 Aug 2023 12:19:26 +0200 Subject: [PATCH 13/33] test: testing package with new driver version --- .github/workflows/exe-release-prometheuspush.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/exe-release-prometheuspush.yml b/.github/workflows/exe-release-prometheuspush.yml index 07772b47..27bc3be0 100644 --- a/.github/workflows/exe-release-prometheuspush.yml +++ b/.github/workflows/exe-release-prometheuspush.yml @@ -32,8 +32,10 @@ jobs: ls "C:\Program Files (x86)\Inno Setup 6\" - name: Get windows-rapl-driver run: | + $dest = "DriverLoader.exe" $url = "${{ env.WRD_BASE_URL }}/${{ env.WRD_VERSION }}/DriverLoader.exe" + echo ($url -replace '"', "") Invoke-WebRequest -Uri ($url -replace '"', "") -OutFile $dest $dest = "ScaphandreDrv.cat" $url = "${{ env.WRD_BASE_URL }}/${{ env.WRD_VERSION }}/ScaphandreDrv.cat" From 9d2651f9d4a2c72805c847bba6fc87a0ec348c45 Mon Sep 17 00:00:00 2001 From: bpetit Date: Thu, 31 Aug 2023 12:21:05 +0200 Subject: [PATCH 14/33] test: testing package with new driver version --- .github/workflows/exe-release-prometheuspush.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/exe-release-prometheuspush.yml b/.github/workflows/exe-release-prometheuspush.yml index 27bc3be0..6afea088 100644 --- a/.github/workflows/exe-release-prometheuspush.yml +++ b/.github/workflows/exe-release-prometheuspush.yml @@ -12,7 +12,7 @@ on: branches: [ '336-proper-handling-of-windows-service-management' ] env: - WRD_VERSION: experimental\-multi\-socket\-1 + WRD_VERSION: experimental-multi-socket-1 WRD_BASE_URL: https://github.com/hubblo-org/windows-rapl-driver/releases/download jobs: From b2fd1b034a985a72360744bc64e96e5146fad925 Mon Sep 17 00:00:00 2001 From: bpetit Date: Thu, 31 Aug 2023 12:27:12 +0200 Subject: [PATCH 15/33] test: testing package with new driver version --- .github/workflows/exe-release-prometheuspush.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/exe-release-prometheuspush.yml b/.github/workflows/exe-release-prometheuspush.yml index 6afea088..a4ead442 100644 --- a/.github/workflows/exe-release-prometheuspush.yml +++ b/.github/workflows/exe-release-prometheuspush.yml @@ -31,8 +31,8 @@ jobs: & "D:\a\scaphandre\scaphandre\$dest" /verysilent /suppressmsgbox ls "C:\Program Files (x86)\Inno Setup 6\" - name: Get windows-rapl-driver + shell: pwsh run: | - $dest = "DriverLoader.exe" $url = "${{ env.WRD_BASE_URL }}/${{ env.WRD_VERSION }}/DriverLoader.exe" echo ($url -replace '"', "") From 53dcb339e2009cee97be524c1e20dc56e5b71ff8 Mon Sep 17 00:00:00 2001 From: bpetit Date: Thu, 31 Aug 2023 12:47:17 +0200 Subject: [PATCH 16/33] test: testing package with new driver version --- .github/workflows/exe-release-prometheuspush.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/exe-release-prometheuspush.yml b/.github/workflows/exe-release-prometheuspush.yml index a4ead442..aded447c 100644 --- a/.github/workflows/exe-release-prometheuspush.yml +++ b/.github/workflows/exe-release-prometheuspush.yml @@ -12,7 +12,7 @@ on: branches: [ '336-proper-handling-of-windows-service-management' ] env: - WRD_VERSION: experimental-multi-socket-1 + WRD_VERSION: v0.0.3 WRD_BASE_URL: https://github.com/hubblo-org/windows-rapl-driver/releases/download jobs: From b19e43efdb47dd9771fce411cee96f79967fe8ec Mon Sep 17 00:00:00 2001 From: bpetit Date: Thu, 31 Aug 2023 18:06:17 +0200 Subject: [PATCH 17/33] build: fixed is script for local dev --- packaging/windows/dev_installer.iss | 14 ++++---- packaging/windows/register_log_source.ps1 | 40 +++++++++++++++++++++++ 2 files changed, 47 insertions(+), 7 deletions(-) create mode 100644 packaging/windows/register_log_source.ps1 diff --git a/packaging/windows/dev_installer.iss b/packaging/windows/dev_installer.iss index fb06f118..710174d8 100644 --- a/packaging/windows/dev_installer.iss +++ b/packaging/windows/dev_installer.iss @@ -40,13 +40,13 @@ Name: "english"; MessagesFile: "compiler:Default.isl" [Files] Source: "{#MyAppSourceFolder}\target\release\{#MyAppExeName}"; DestDir: "{app}"; Flags: ignoreversion Source: "{#RaplDriverSourceFolder}\x64\Release\DriverLoader.exe"; DestDir: "{app}"; Flags: ignoreversion -Source: "{#RaplDriverSourceFolder}\ScaphandreDrv\ScaphandreDrv.inf"; DestDir: "{app}"; Flags: ignoreversion -; Source: "{#RaplDriverSourceFolder}\ScaphandreDrv\ScaphandreDrv.sys"; DestDir: "{#SystemFolder}"; -; Source: "{#RaplDriverSourceFolder}\ScaphandreDrv\ScaphandreDrv.sys"; DestDir: "{#System64Folder}"; -Source: "{#RaplDriverSourceFolder}\ScaphandreDrv\ScaphandreDrv.sys"; DestDir: "{app}"; -Source: "{#RaplDriverSourceFolder}\ScaphandreDrv\ScaphandreDrv.cat"; DestDir: "{app}"; -; Source: "{#RaplDriverSourceFolder}\ScaphandreDrv\ScaphandreDrv.cat"; DestDir: "{#SystemFolder}"; -; Source: "{#RaplDriverSourceFolder}\ScaphandreDrv\ScaphandreDrv.cat"; DestDir: "{#System64Folder}"; +Source: "{#RaplDriverSourceFolder}\x64\Release\ScaphandreDrv\ScaphandreDrv.inf"; DestDir: "{app}"; Flags: ignoreversion +; Source: "{#RaplDriverSourceFolder}\x64\Release\ScaphandreDrv\ScaphandreDrv.sys"; DestDir: "{#SystemFolder}"; +; Source: "{#RaplDriverSourceFolder}\x64\Release\ScaphandreDrv\ScaphandreDrv.sys"; DestDir: "{#System64Folder}"; +Source: "{#RaplDriverSourceFolder}\x64\Release\ScaphandreDrv\ScaphandreDrv.sys"; DestDir: "{app}"; +Source: "{#RaplDriverSourceFolder}\x64\Release\ScaphandreDrv\ScaphandreDrv.cat"; DestDir: "{app}"; +; Source: "{#RaplDriverSourceFolder}\x64\Release\ScaphandreDrv\ScaphandreDrv.cat"; DestDir: "{#SystemFolder}"; +; Source: "{#RaplDriverSourceFolder}\x64\Release\ScaphandreDrv\ScaphandreDrv.cat"; DestDir: "{#System64Folder}"; Source: "C:\Program Files (x86)\Windows Kits\10\Tools\10.0.22621.0\x64\devcon.exe"; DestDir: "{app}"; Flags: ignoreversion Source: "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\x64\certmgr.exe"; DestDir: "{app}"; Flags: ignoreversion Source: "{#MyAppSourceFolder}\README.md"; DestDir: "{app}"; Flags: ignoreversion diff --git a/packaging/windows/register_log_source.ps1 b/packaging/windows/register_log_source.ps1 new file mode 100644 index 00000000..e6d7283b --- /dev/null +++ b/packaging/windows/register_log_source.ps1 @@ -0,0 +1,40 @@ +# https://github.com/dansmith +# +$source = "scaphandre" + + +$wid=[System.Security.Principal.WindowsIdentity]::GetCurrent() +$prp=new-object System.Security.Principal.WindowsPrincipal($wid) +$adm=[System.Security.Principal.WindowsBuiltInRole]::Administrator +$IsAdmin=$prp.IsInRole($adm) + +if($IsAdmin -eq $false) +{ + [System.Reflection.Assembly]::LoadWithPartialName(“System.Windows.Forms”) + [Windows.Forms.MessageBox]::Show(“Please run this as an Administrator”, + “Not Administrator”, + [Windows.Forms.MessageBoxButtons]::OK, + [Windows.Forms.MessageBoxIcon]::Information) + exit +} + + +if ([System.Diagnostics.EventLog]::SourceExists($source) -eq $false) +{ + [System.Diagnostics.EventLog]::CreateEventSource($source, "Application") + + [System.Reflection.Assembly]::LoadWithPartialName(“System.Windows.Forms”) + [Windows.Forms.MessageBox]::Show(“Event log created successfully”, + “Complete”, + [Windows.Forms.MessageBoxButtons]::OK, + [Windows.Forms.MessageBoxIcon]::Information) +} +else +{ + [System.Reflection.Assembly]::LoadWithPartialName(“System.Windows.Forms”) + [Windows.Forms.MessageBox]::Show(“Event log already exists”, + “Complete”, + [Windows.Forms.MessageBoxButtons]::OK, + [Windows.Forms.MessageBoxIcon]::Information) + +} \ No newline at end of file From e7b91a7a62e08baee81aaa8a78669264b623dab6 Mon Sep 17 00:00:00 2001 From: bpetit Date: Thu, 31 Aug 2023 18:06:50 +0200 Subject: [PATCH 18/33] feat: enabled manually setting number of cpu sockets for windows/msrrapl sensor --- src/exporters/mod.rs | 4 +- src/exporters/stdout.rs | 1 + src/lib.rs | 4 +- src/main.rs | 11 ++- src/sensors/mod.rs | 93 ++++++++++++-------- src/sensors/msr_rapl.rs | 185 ++++++++++++++++++++++++---------------- src/sensors/utils.rs | 7 +- 7 files changed, 185 insertions(+), 120 deletions(-) diff --git a/src/exporters/mod.rs b/src/exporters/mod.rs index 3ece8fcb..d458c475 100644 --- a/src/exporters/mod.rs +++ b/src/exporters/mod.rs @@ -900,7 +900,7 @@ impl MetricGenerator { /// Generate process metrics. fn gen_process_metrics(&mut self) { - debug!("In gen_process_metrics."); + trace!("In gen_process_metrics."); #[cfg(feature = "containers")] if self.watch_containers { let now = current_system_time_since_epoch().as_secs().to_string(); @@ -1043,7 +1043,7 @@ impl MetricGenerator { Utc::now().format("%Y-%m-%dT%H:%M:%S") ); self.gen_process_metrics(); - debug!("self_metrics: {:#?}", self.data); + trace!("self_metrics: {:#?}", self.data); } pub fn pop_metrics(&mut self) -> Vec { diff --git a/src/exporters/stdout.rs b/src/exporters/stdout.rs index 1132c99f..35f880c5 100644 --- a/src/exporters/stdout.rs +++ b/src/exporters/stdout.rs @@ -134,6 +134,7 @@ impl StdoutExporter { .iter() .filter(|x| x.name == "scaph_socket_power_microwatts") { + warn!("✅ Found socket power metric !"); let power = format!("{}", s.metric_value).parse::().unwrap() / 1000000.0; let mut power_str = String::from("----"); if power > 0.0 { diff --git a/src/lib.rs b/src/lib.rs index 60b65c46..352cbdc8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -27,7 +27,9 @@ pub fn get_default_sensor() -> impl sensors::Sensor { ); #[cfg(target_os = "windows")] - return msr_rapl::MsrRAPLSensor::new(); + return msr_rapl::MsrRAPLSensor::new( + 1 + ); } fn current_system_time_since_epoch() -> Duration { diff --git a/src/main.rs b/src/main.rs index 98539238..899f0463 100644 --- a/src/main.rs +++ b/src/main.rs @@ -74,6 +74,11 @@ struct Cli { #[cfg(target_os = "linux")] #[arg(long, default_value_t = powercap_rapl::DEFAULT_BUFFER_PER_SOCKET_MAX_KBYTES)] sensor_buffer_per_socket_max_kb: u16, + + /// Number of physical CPU packages/sockets enabled on the host + #[cfg(target_os = "windows")] + #[arg(long, default_value_t = 1)] + sensor_nb_cpu_sockets: u16, } /// Defines the possible subcommands, one per exporter. @@ -280,7 +285,11 @@ fn build_sensor(cli: &Cli) -> impl Sensor { }; #[cfg(target_os = "windows")] - let msr_sensor_win = msr_rapl::MsrRAPLSensor::new; + let msr_sensor_win = || { + msr_rapl::MsrRAPLSensor::new( + cli.sensor_nb_cpu_sockets + ) + }; match cli.sensor.as_deref() { Some("powercap_rapl") => { diff --git a/src/sensors/mod.rs b/src/sensors/mod.rs index c0055c2a..47691eea 100644 --- a/src/sensors/mod.rs +++ b/src/sensors/mod.rs @@ -278,30 +278,48 @@ impl Topology { /// to appropriate CPUSocket instance from self.sockets pub fn add_cpu_cores(&mut self) { if let Some(mut cores) = Topology::generate_cpu_cores() { - while let Some(c) = cores.pop() { - let socket_id = &c - .attributes - .get("physical id") - .unwrap() - .parse::() - .unwrap(); - let socket_match = self.sockets.iter_mut().find(|x| &x.id == socket_id); - - //In VMs there might be a missmatch betwen Sockets and Cores - see Issue#133 as a first fix we just map all cores that can't be mapped to the first - let socket = match socket_match { - Some(x) => x, - None =>self.sockets.first_mut().expect("Trick: if you are running on a vm, do not forget to use --vm parameter invoking scaphandre at the command line") - }; - - if socket_id == &socket.id { - socket.add_cpu_core(c); - } else { - socket.add_cpu_core(c); - warn!("coud't not match core to socket - mapping to first socket instead - if you are not using --vm there is something wrong") + #[cfg(target_os = "linux")] { + while let Some(c) = cores.pop() { + let socket_id = &c + .attributes + .get("physical id") + .unwrap() + .parse::() + .unwrap(); + let socket_match = self.sockets.iter_mut().find(|x| &x.id == socket_id); + + //In VMs there might be a missmatch betwen Sockets and Cores - see Issue#133 as a first fix we just map all cores that can't be mapped to the first + let socket = match socket_match { + Some(x) => x, + None =>self.sockets.first_mut().expect("Trick: if you are running on a vm, do not forget to use --vm parameter invoking scaphandre at the command line") + }; + + if socket_id == &socket.id { + socket.add_cpu_core(c); + } else { + socket.add_cpu_core(c); + warn!("coud't not match core to socket - mapping to first socket instead - if you are not using --vm there is something wrong") + } + } + } + #[cfg(target_os = "windows")] + { + let nb_cores_per_socket = &cores.len() / &self.sockets.len(); + for s in self.sockets.iter_mut() { + for c in 1..nb_cores_per_socket { + match cores.pop() { + Some(core) => { + s.add_cpu_core(core); + }, + None => { + error!("Uneven number of CPU cores !"); + } + } + } } } } else { - warn!("Couldn't retrieve any CPU Core from the topology. (generate_cpu_cores)"); + panic!("Couldn't retrieve any CPU Core from the topology. (generate_cpu_cores)"); } } @@ -1109,16 +1127,17 @@ impl CPUSocket { steal: Some(0), }; for c in &self.cpu_cores { - let c_stats = c.read_stats().unwrap(); - stats.user += c_stats.user; - stats.nice += c_stats.nice; - stats.system += c_stats.system; - stats.idle += c_stats.idle; - stats.iowait = - Some(stats.iowait.unwrap_or_default() + c_stats.iowait.unwrap_or_default()); - stats.irq = Some(stats.irq.unwrap_or_default() + c_stats.irq.unwrap_or_default()); - stats.softirq = - Some(stats.softirq.unwrap_or_default() + c_stats.softirq.unwrap_or_default()); + if let Some(c_stats) = c.read_stats() { + stats.user += c_stats.user; + stats.nice += c_stats.nice; + stats.system += c_stats.system; + stats.idle += c_stats.idle; + stats.iowait = + Some(stats.iowait.unwrap_or_default() + c_stats.iowait.unwrap_or_default()); + stats.irq = Some(stats.irq.unwrap_or_default() + c_stats.irq.unwrap_or_default()); + stats.softirq = + Some(stats.softirq.unwrap_or_default() + c_stats.softirq.unwrap_or_default()); + } } Some(stats) } @@ -1184,9 +1203,9 @@ impl CPUSocket { &last_record.value, &previous_record.value ); let last_rec_val = last_record.value.trim(); - debug!("socket : l1049 : trying to parse {} as u64", last_rec_val); + debug!("socket : l1187 : trying to parse {} as u64", last_rec_val); let prev_rec_val = previous_record.value.trim(); - debug!("socket : l1051 : trying to parse {} as u64", prev_rec_val); + debug!("socket : l1189 : trying to parse {} as u64", prev_rec_val); if let (Ok(last_microjoules), Ok(previous_microjoules)) = (last_rec_val.parse::(), prev_rec_val.parse::()) { @@ -1210,7 +1229,7 @@ impl CPUSocket { )); } } else { - debug!("Not enough records for socket"); + warn!("Not enough records for socket"); } None } @@ -1532,7 +1551,7 @@ mod tests { #[cfg(target_os = "linux")] let sensor = powercap_rapl::PowercapRAPLSensor::new(8, 8, false); #[cfg(not(target_os = "linux"))] - let sensor = msr_rapl::MsrRAPLSensor::new(); + let sensor = msr_rapl::MsrRAPLSensor::new(1); let topo = (*sensor.get_topology()).unwrap(); println!("{:?}", topo.read_stats()); } @@ -1542,7 +1561,7 @@ mod tests { #[cfg(target_os = "linux")] let sensor = powercap_rapl::PowercapRAPLSensor::new(8, 8, false); #[cfg(not(target_os = "linux"))] - let sensor = msr_rapl::MsrRAPLSensor::new(); + let sensor = msr_rapl::MsrRAPLSensor::new(1); let mut topo = (*sensor.get_topology()).unwrap(); for s in topo.get_sockets() { for c in s.get_cores() { @@ -1556,7 +1575,7 @@ mod tests { #[cfg(target_os = "linux")] let sensor = powercap_rapl::PowercapRAPLSensor::new(8, 8, false); #[cfg(not(target_os = "linux"))] - let sensor = msr_rapl::MsrRAPLSensor::new(); + let sensor = msr_rapl::MsrRAPLSensor::new(1); let mut topo = (*sensor.get_topology()).unwrap(); for s in topo.get_sockets() { println!("{:?}", s.read_stats()); diff --git a/src/sensors/msr_rapl.rs b/src/sensors/msr_rapl.rs index b06095f7..0c186f16 100644 --- a/src/sensors/msr_rapl.rs +++ b/src/sensors/msr_rapl.rs @@ -12,18 +12,27 @@ use windows::Win32::Storage::FileSystem::{ use windows::Win32::System::Ioctl::{FILE_DEVICE_UNKNOWN, METHOD_BUFFERED}; use windows::Win32::System::IO::DeviceIoControl; -const MSR_RAPL_POWER_UNIT: u16 = 0x606; // - //const MSR_PKG_POWER_LIMIT: u16 = 0x610; // PKG RAPL Power Limit Control (R/W) See Section 14.7.3, Package RAPL Domain. -const MSR_PKG_ENERGY_STATUS: u16 = 0x611; -//const MSR_PKG_POWER_INFO: u16 = 0x614; -//const MSR_DRAM_ENERGY_STATUS: u16 = 0x619; -//const MSR_PP0_ENERGY_STATUS: u16 = 0x639; //PP0 Energy Status (R/O) See Section 14.7.4, PP0/PP1 RAPL Domains. -//const MSR_PP0_PERF_STATUS: u16 = 0x63b; // PP0 Performance Throttling Status (R/O) See Section 14.7.4, PP0/PP1 RAPL Domains. -//const MSR_PP0_POLICY: u16 = 0x63a; //PP0 Balance Policy (R/W) See Section 14.7.4, PP0/PP1 RAPL Domains. -//const MSR_PP0_POWER_LIMIT: u16 = 0x638; // PP0 RAPL Power Limit Control (R/W) See Section 14.7.4, PP0/PP1 RAPL Domains. -//const MSR_PP1_ENERGY_STATUS: u16 = 0x641; // PP1 Energy Status (R/O) See Section 14.7.4, PP0/PP1 RAPL Domains. -//const MSR_PP1_POLICY: u16 = 0x642; // PP1 Balance Policy (R/W) See Section 14.7.4, PP0/PP1 RAPL Domains. -//const MSR_PP1_POWER_LIMIT: u16 = 0x640; // PP1 RAPL Power Limit Control (R/W) See Section 14.7.4, PP0/PP1 RAPL Domains. +// Intel RAPL MSRs +const MSR_RAPL_POWER_UNIT: u32 = 0x606; // +const MSR_PKG_POWER_LIMIT: u32 = 0x610; // PKG RAPL Power Limit Control (R/W) See Section 14.7.3, Package RAPL Domain. +const MSR_PKG_ENERGY_STATUS: u32 = 0x611; +const MSR_PKG_POWER_INFO: u32 = 0x614; +const MSR_DRAM_ENERGY_STATUS: u32 = 0x619; +const MSR_PP0_ENERGY_STATUS: u32 = 0x639; //PP0 Energy Status (R/O) See Section 14.7.4, PP0/PP1 RAPL Domains. +const MSR_PP0_PERF_STATUS: u32 = 0x63b; // PP0 Performance Throttling Status (R/O) See Section 14.7.4, PP0/PP1 RAPL Domains. +const MSR_PP0_POLICY: u32 = 0x63a; //PP0 Balance Policy (R/W) See Section 14.7.4, PP0/PP1 RAPL Domains. +const MSR_PP0_POWER_LIMIT: u32 = 0x638; // PP0 RAPL Power Limit Control (R/W) See Section 14.7.4, PP0/PP1 RAPL Domains. +const MSR_PP1_ENERGY_STATUS: u32 = 0x641; // PP1 Energy Status (R/O) See Section 14.7.4, PP0/PP1 RAPL Domains. +const MSR_PP1_POLICY: u32 = 0x642; // PP1 Balance Policy (R/W) See Section 14.7.4, PP0/PP1 RAPL Domains. +const MSR_PP1_POWER_LIMIT: u32 = 0x640; // PP1 RAPL Power Limit Control (R/W) See Section 14.7.4, PP0/PP1 RAPL Domains. +const MSR_PLATFORM_ENERGY_STATUS: u32 = 0x0000064d; +const MSR_PLATFORM_POWER_LIMIT: u32 = 0x0000065c ; + +// AMD RAPL MSRs +const MSR_AMD_RAPL_POWER_UNIT: u32 = 0xc0010299; +const MSR_AMD_CORE_ENERGY_STATUS: u32 = 0xc001029a; +const MSR_AMD_PKG_ENERGY_STATUS: u32 = 0xc001029b; + unsafe fn ctl_code(device_type: u32, request_code: u32, method: u32, access: u32) -> u32 { ((device_type) << 16) | ((access) << 14) | ((request_code) << 2) | (method) @@ -67,16 +76,17 @@ pub struct MsrRAPLSensor { power_unit: f64, energy_unit: f64, time_unit: f64, + nb_cpu_sockets: u16 } impl Default for MsrRAPLSensor { fn default() -> Self { - Self::new() + Self::new(1) } } impl MsrRAPLSensor { - pub fn new() -> MsrRAPLSensor { + pub fn new(nb_cpu_sockets: u16) -> MsrRAPLSensor { let driver_name = "\\\\.\\ScaphandreDriver"; let mut power_unit: f64 = 1.0; @@ -114,6 +124,7 @@ impl MsrRAPLSensor { energy_unit, power_unit, time_unit, + nb_cpu_sockets } } @@ -159,18 +170,30 @@ impl MsrRAPLSensor { impl RecordReader for Topology { fn read_record(&self) -> Result> { - let randval: i32 = rand::random(); + let mut res: u64 = 0; + warn!("Topology: I have {} sockets", self.sockets.len()); + for s in &self.sockets { + match s.read_record() { + Ok(rec) => { + warn!("rec: {:?}", rec); + res = res + rec.value.parse::()?; + }, + Err(e) => { + error!("Failed to get socket record : {:?}", e); + } + } + } Ok(Record { timestamp: current_system_time_since_epoch(), unit: super::units::Unit::MicroJoule, - value: format!("{}", randval), + value: res.to_string(), }) } } unsafe fn send_request( device: HANDLE, - request_code: u16, + request_code: u32, request: *const u64, request_length: usize, reply: *mut u64, @@ -180,7 +203,7 @@ unsafe fn send_request( let len_ptr: *mut u32 = &mut len; if DeviceIoControl( - device, // envoi 8 octet et je recoi 8 octet + device, // send 8 bytes, receive 8 bytes crate::sensors::msr_rapl::ctl_code( FILE_DEVICE_UNKNOWN, request_code as _, @@ -213,61 +236,65 @@ impl RecordReader for CPUSocket { fn read_record(&self) -> Result> { unsafe { let driver_name = self.sensor_data.get("DRIVER_NAME").unwrap(); - if let Ok(device) = get_handle(driver_name) { - let mut msr_result: u64 = 0; - let ptr_result = &mut msr_result as *mut u64; - let mut src = MSR_RAPL_POWER_UNIT as u64; - let ptr = &src as *const u64; - - src = MSR_PKG_ENERGY_STATUS as u64; - trace!("src: {:x}", src); - trace!("src: {:b}", src); - - trace!("*ptr: {}", *ptr); - trace!("&request: {:?} ptr (as *const u8): {:?}", &src, ptr); - - if let Ok(res) = send_request( - device, - MSR_PKG_ENERGY_STATUS, - // nouvelle version à integrer : request_code est ignoré et request doit contenir - // request_code sous forme d'un char * - ptr, - 8, - ptr_result, - size_of::(), - ) { - debug!("{}", res); - - close_handle(device); - - let energy_unit = self - .sensor_data - .get("ENERGY_UNIT") - .unwrap() - .parse::() - .unwrap(); - - Ok(Record { - timestamp: current_system_time_since_epoch(), - unit: super::units::Unit::MicroJoule, - value: MsrRAPLSensor::extract_rapl_current_power(msr_result, energy_unit), - }) - } else { - error!("Failed to get data from send_request."); - close_handle(device); - Ok(Record { - timestamp: current_system_time_since_epoch(), - unit: super::units::Unit::MicroJoule, - value: String::from("0"), - }) + match get_handle(driver_name) { + Ok(device) => { + let mut msr_result: u64 = 0; + let ptr_result = &mut msr_result as *mut u64; + // get core numbers tied to the socket + let src = MSR_PKG_ENERGY_STATUS as u64; + let ptr = &src as *const u64; + + trace!("src: {:x}", src); + trace!("src: {:b}", src); + + trace!("*ptr: {}", *ptr); + trace!("&request: {:?} ptr (as *const u8): {:?}", &src, ptr); + + match send_request( + device, + MSR_PKG_ENERGY_STATUS, + // nouvelle version à integrer : request_code est ignoré et request doit contenir + // request_code sous forme d'un char * + ptr, + 8, + ptr_result, + size_of::(), + ) { + Ok(res) => { + debug!("{}", res); + + close_handle(device); + + let energy_unit = self + .sensor_data + .get("ENERGY_UNIT") + .unwrap() + .parse::() + .unwrap(); + + let current_power = MsrRAPLSensor::extract_rapl_current_power(msr_result, energy_unit); + warn!("current_power: {}", current_power); + + Ok(Record { + timestamp: current_system_time_since_epoch(), + unit: super::units::Unit::MicroJoule, + value: current_power, + }) + }, + Err(e) => { + error!("Failed to get data from send_request: {:?}", e); + close_handle(device); + Ok(Record { + timestamp: current_system_time_since_epoch(), + unit: super::units::Unit::MicroJoule, + value: String::from("0"), + }) + } + } + }, + Err(e) => { + panic!("Couldn't get driver handle : {:?}", e); } - } else { - error!("Couldn't get handle."); - Ok(Record { - timestamp: current_system_time_since_epoch(), - unit: super::units::Unit::MicroJoule, - value: String::from("0"), - }) } } } @@ -293,9 +320,19 @@ impl Sensor for MsrRAPLSensor { let mut topology = Topology::new(sensor_data.clone()); let mut sys = System::new_all(); sys.refresh_all(); - let i = 0; + + warn!("Got {} sockets CPU", self.nb_cpu_sockets); + //TODO fix that to actually count the number of sockets - topology.safe_add_socket(i, vec![], vec![], String::from(""), 4, sensor_data.clone()); + let mut i = 0; + let logical_cpus = sys.cpus() ; + + while i < self.nb_cpu_sockets { + topology.safe_add_socket(i, vec![], vec![], String::from(""), 4, sensor_data.clone()); + i = i + 1; + } + + topology.add_cpu_cores(); Ok(topology) } diff --git a/src/sensors/utils.rs b/src/sensors/utils.rs index 1827d054..ceac1cf2 100644 --- a/src/sensors/utils.rs +++ b/src/sensors/utils.rs @@ -387,7 +387,7 @@ impl ProcessTracker { /// Returns all vectors of process records linked to a running, sleeping, waiting or zombie process. /// (Not terminated) pub fn get_alive_processes(&self) -> Vec<&Vec> { - debug!("In get alive processes."); + trace!("In get alive processes."); let mut res = vec![]; for p in self.procs.iter() { //#[cfg(target_os = "linux")] @@ -412,7 +412,7 @@ impl ProcessTracker { } } } - debug!("End of get alive processes."); + trace!("End of get alive processes."); res } @@ -632,7 +632,6 @@ impl ProcessTracker { if result.next().is_some() { panic!("Found two vectors of processes with the same id, maintainers should fix this."); } - debug!("End of get process name."); process.get(0).unwrap().process.comm.clone() } @@ -652,11 +651,9 @@ impl ProcessTracker { cmdline.push_str(&cmdline_vec.remove(0)); } } - debug!("End of get process cmdline."); return Some(cmdline); } } - debug!("End of get process cmdline."); None } From 33d4dd63742719070b1876f7c576d1b1c83f74f1 Mon Sep 17 00:00:00 2001 From: bpetit Date: Mon, 4 Sep 2023 15:52:15 +0200 Subject: [PATCH 19/33] feat: early verboe version with multi-socket fixed --- src/exporters/stdout.rs | 2 ++ src/sensors/msr_rapl.rs | 18 ++++++++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/exporters/stdout.rs b/src/exporters/stdout.rs index 35f880c5..9044a46c 100644 --- a/src/exporters/stdout.rs +++ b/src/exporters/stdout.rs @@ -178,6 +178,8 @@ impl StdoutExporter { } } println!("{to_print}\n"); + } else { + println!("{to_print} Could'nt get per-domain metrics.\n"); } } diff --git a/src/sensors/msr_rapl.rs b/src/sensors/msr_rapl.rs index 0c186f16..fa92cfe3 100644 --- a/src/sensors/msr_rapl.rs +++ b/src/sensors/msr_rapl.rs @@ -240,14 +240,24 @@ impl RecordReader for CPUSocket { Ok(device) => { let mut msr_result: u64 = 0; let ptr_result = &mut msr_result as *mut u64; + let mut core_id: u32 = 0; // get core numbers tied to the socket - let src = MSR_PKG_ENERGY_STATUS as u64; + if let Some(core) = self.cpu_cores.first() { + core_id = core.id as u32; + } else { + panic!("Couldn't get a CPUCore in socket {}", self.id); + } + warn!("msr: {:x}", (MSR_PKG_ENERGY_STATUS as u64)); + warn!("msr: {:b}", (MSR_PKG_ENERGY_STATUS as u64)); + warn!("core_id: {:x} {:b}", (core_id as u64), (core_id as u64)); + warn!("core_id: {:b}", ((core_id as u64) << 54)); + let src = ((core_id as u64) << 32) | (MSR_PKG_ENERGY_STATUS as u64); let ptr = &src as *const u64; - trace!("src: {:x}", src); - trace!("src: {:b}", src); + warn!("src: {:x}", src); + warn!("src: {:b}", src); - trace!("*ptr: {}", *ptr); + warn!("*ptr: {}", *ptr); trace!("&request: {:?} ptr (as *const u8): {:?}", &src, ptr); match send_request( From 8a2149e17aa90fe57fe677e6694ecc24e6ab9c36 Mon Sep 17 00:00:00 2001 From: bpetit Date: Thu, 7 Sep 2023 18:50:58 +0200 Subject: [PATCH 20/33] feat: added rapl core+uncore+domains to topology --- Cargo.lock | 22 ++++ Cargo.toml | 2 + src/sensors/mod.rs | 6 +- src/sensors/msr_rapl.rs | 255 +++++++++++++++++++++++++++++++++++++++- 4 files changed, 277 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3821d73d..a971b21e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -261,6 +261,17 @@ version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" +[[package]] +name = "core_affinity" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "622892f5635ce1fc38c8f16dfc938553ed64af482edb5e150bf4caedbfcb2304" +dependencies = [ + "libc", + "num_cpus", + "winapi", +] + [[package]] name = "crc32fast" version = "1.3.2" @@ -1303,6 +1314,15 @@ dependencies = [ "rand_core", ] +[[package]] +name = "raw-cpuid" +version = "10.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c297679cb867470fa8c9f67dbba74a78d78e3e98d7cf2b08d6d71540f797332" +dependencies = [ + "bitflags", +] + [[package]] name = "rayon" version = "1.7.0" @@ -1465,6 +1485,7 @@ dependencies = [ "chrono", "clap", "colored", + "core_affinity", "docker-sync", "hostname", "hyper", @@ -1476,6 +1497,7 @@ dependencies = [ "procfs", "protobuf", "rand", + "raw-cpuid", "regex", "riemann_client", "serde", diff --git a/Cargo.toml b/Cargo.toml index cf0319e9..e7478467 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,6 +40,8 @@ procfs = { version = "0.15.0" } [target.'cfg(target_os="windows")'.dependencies] windows = { version = "0.27.0", features = ["alloc","Win32_Storage_FileSystem","Win32_Foundation","Win32_Security","Win32_System_IO","Win32_System_Ioctl"]} windows-service = { version = "0.6.0" } +raw-cpuid = { version = "10.5.0" } +core_affinity = { version = "0.8.1"} [features] default = ["prometheus", "riemann", "warpten", "json", "containers", "prometheuspush"] diff --git a/src/sensors/mod.rs b/src/sensors/mod.rs index 47691eea..a1f08211 100644 --- a/src/sensors/mod.rs +++ b/src/sensors/mod.rs @@ -305,10 +305,12 @@ impl Topology { #[cfg(target_os = "windows")] { let nb_cores_per_socket = &cores.len() / &self.sockets.len(); - for s in self.sockets.iter_mut() { - for c in 1..nb_cores_per_socket { + warn!("nb_cores_per_socket: {} cores_len: {} sockets_len: {}", nb_cores_per_socket, &cores.len(), &self.sockets.len()); + for s in self.sockets.iter_mut().rev() { + for c in 0..nb_cores_per_socket { match cores.pop() { Some(core) => { + warn!("adding core {} to socket {}", core.id, s.id); s.add_cpu_core(core); }, None => { diff --git a/src/sensors/msr_rapl.rs b/src/sensors/msr_rapl.rs index fa92cfe3..c70c864e 100644 --- a/src/sensors/msr_rapl.rs +++ b/src/sensors/msr_rapl.rs @@ -4,6 +4,7 @@ use std::collections::HashMap; use std::error::Error; use std::mem::size_of; use sysinfo::{System, SystemExt}; +use raw_cpuid::{CpuId, TopologyType}; use windows::Win32::Foundation::{CloseHandle, GetLastError, HANDLE, INVALID_HANDLE_VALUE}; use windows::Win32::Storage::FileSystem::{ CreateFileW, FILE_FLAG_OVERLAPPED, FILE_GENERIC_READ, FILE_GENERIC_WRITE, FILE_READ_DATA, @@ -12,6 +13,8 @@ use windows::Win32::Storage::FileSystem::{ use windows::Win32::System::Ioctl::{FILE_DEVICE_UNKNOWN, METHOD_BUFFERED}; use windows::Win32::System::IO::DeviceIoControl; +use core_affinity; + // Intel RAPL MSRs const MSR_RAPL_POWER_UNIT: u32 = 0x606; // const MSR_PKG_POWER_LIMIT: u32 = 0x610; // PKG RAPL Power Limit Control (R/W) See Section 14.7.3, Package RAPL Domain. @@ -244,6 +247,20 @@ impl RecordReader for CPUSocket { // get core numbers tied to the socket if let Some(core) = self.cpu_cores.first() { core_id = core.id as u32; + match core_affinity::get_core_ids() { + Some(core_ids) => { + for c in core_ids { + if c.id == core.id as usize { + core_affinity::set_for_current(c); + warn!("Set core_affinity to {}", c.id); + break; + } + } + }, + None => { + warn!("Could'nt get core ids from core_affinity."); + } + } } else { panic!("Couldn't get a CPUCore in socket {}", self.id); } @@ -258,6 +275,7 @@ impl RecordReader for CPUSocket { warn!("src: {:b}", src); warn!("*ptr: {}", *ptr); + warn!("*ptr: {:b}", *ptr); trace!("&request: {:?} ptr (as *const u8): {:?}", &src, ptr); match send_request( @@ -311,14 +329,42 @@ impl RecordReader for CPUSocket { } impl RecordReader for Domain { fn read_record(&self) -> Result> { - Ok(Record { - timestamp: current_system_time_since_epoch(), - unit: super::units::Unit::MicroJoule, - value: String::from("10"), - }) + if let core_id = self.sensor_data.get("CORE_ID").unwrap().parse::().unwrap() { + if let msr_addr = self.sensor_data.get("MSR_ADDR").unwrap().parse::().unwrap() { + unsafe { + match get_msr_value(core_id, msr_addr, &self.sensor_data) { + Ok(rec) => { + return Ok(Record { + timestamp: current_system_time_since_epoch(), + unit: super::units::Unit::MicroJoule, + value: rec.value, + }) + }, + Err(e) => { + error!("Could'nt get MSR value for {}: {}", msr_addr, e); + Ok(Record { + timestamp: current_system_time_since_epoch(), + value: String::from("0"), + unit: super::units::Unit::MicroJoule + }) + } + } + } + } else { + panic!("Couldn't get msr_addr to target for domain {}", self.name); + } + } else { + panic!("Couldn't get core_id to target for domain {}", self.name); + } } } +//fn get_cpu_info() -> Option { +// let cpuid = CpuId::new(); +// +// +//} + impl Sensor for MsrRAPLSensor { fn generate_topology(&self) -> Result> { let mut sensor_data = HashMap::new(); @@ -331,18 +377,143 @@ impl Sensor for MsrRAPLSensor { let mut sys = System::new_all(); sys.refresh_all(); - warn!("Got {} sockets CPU", self.nb_cpu_sockets); //TODO fix that to actually count the number of sockets let mut i = 0; let logical_cpus = sys.cpus() ; + + warn!("Got {} sockets CPU from command line", self.nb_cpu_sockets); + + let mut nb_cpu_sockets = 0; + let mut logical_cpus_from_cpuid = 0; + let cpuid = CpuId::new(); + match cpuid.get_vendor_info() { + Some(info) => { + warn!("Got CPU {:?}", info); + }, + None => { + warn!("Couldn't get cpuinfo"); + } + } + for i in 0..5 { + match cpuid.get_extended_topology_info() { + Some(info) => { + warn!("Got CPU topo info {:?}", info); + for t in info { + if t.level_type() == TopologyType::Core { + logical_cpus_from_cpuid = t.processors() + } + } + }, + None => { + warn!("Couldn't get cpu topo info"); + } + } + } + warn!("Logical cpus from sysinfo: {} logical cpus from cpuid: {}", logical_cpus.len(), logical_cpus_from_cpuid); + match cpuid.get_advanced_power_mgmt_info() { + Some(info) => { + warn!("Got CPU power mgmt info {:?}", info); + }, + None => { + warn!("Couldn't get cpu power info"); + } + } + match cpuid.get_extended_feature_info() { + Some(info) => { + warn!("Got CPU feature info {:?}", info); + }, + None => { + warn!("Couldn't get cpu feature info"); + } + } + match cpuid.get_performance_monitoring_info() { + Some(info) => { + warn!("Got CPU perfmonitoring info {:?}", info); + }, + None => { + warn!("Couldn't get cpu perfmonitoring info"); + } + } + match cpuid.get_thermal_power_info() { + Some(info) => { + warn!("Got CPU thermal info {:?}", info); + }, + None => { + warn!("Couldn't get cpu thermal info"); + } + } + match cpuid.get_extended_state_info() { + Some(info) => { + warn!("Got CPU state info {:?}", info); + }, + None => { + warn!("Couldn't get cpu state info"); + } + } + match cpuid.get_processor_capacity_feature_info() { + Some(info) => { + warn!("Got CPU capacity info {:?}", info); + }, + None => { + warn!("Couldn't get cpu capacity info"); + } + } + if self.nb_cpu_sockets > 2 && logical_cpus.len() < 12 { + warn!("Scaphandre has been told to expect {} CPU sockets but there is less than 12 logical cores in total ({}).", self.nb_cpu_sockets, logical_cpus.len()); + warn!("This is unlikely, be careful to configure Scaphandre for the right number of active CPU sockets on your machine"); + } while i < self.nb_cpu_sockets { topology.safe_add_socket(i, vec![], vec![], String::from(""), 4, sensor_data.clone()); + + //topology.safe_add_domain_to_socket(i, , name, uj_counter, buffer_max_kbytes, sensor_data) i = i + 1; } topology.add_cpu_cores(); + + for s in topology.get_sockets() { + unsafe { + let core_id = s.get_cores_passive().first().unwrap().id; + match get_msr_value(core_id as usize, MSR_DRAM_ENERGY_STATUS as u64, &sensor_data) { + Ok(rec) => { + warn!("Added domain Dram !"); + let mut domain_sensor_data = sensor_data.clone(); + domain_sensor_data.insert(String::from("MSR_ADDR"), MSR_DRAM_ENERGY_STATUS.to_string()); + domain_sensor_data.insert(String::from("CORE_ID"), core_id.to_string()); + s.safe_add_domain(Domain::new(2, String::from("dram"), String::from(""), 5, domain_sensor_data)) + }, + Err(e) => { + error!("Could'nt add Dram domain."); + } + } + match get_msr_value(core_id as usize, MSR_PP0_ENERGY_STATUS as u64, &sensor_data) { + Ok(rec) => { + warn!("Added domain Core !"); + let mut domain_sensor_data = sensor_data.clone(); + domain_sensor_data.insert(String::from("MSR_ADDR"), MSR_PP0_ENERGY_STATUS.to_string()); + domain_sensor_data.insert(String::from("CORE_ID"), core_id.to_string()); + s.safe_add_domain(Domain::new(2, String::from("core"), String::from(""), 5, domain_sensor_data)) + }, + Err(e) => { + error!("Could'nt add Core domain."); + } + } + match get_msr_value(core_id as usize, MSR_PP1_ENERGY_STATUS as u64, &sensor_data) { + Ok(rec) => { + warn!("Added domain Uncore !"); + let mut domain_sensor_data = sensor_data.clone(); + domain_sensor_data.insert(String::from("MSR_ADDR"), MSR_PP1_ENERGY_STATUS.to_string()); + domain_sensor_data.insert(String::from("CORE_ID"), core_id.to_string()); + s.safe_add_domain(Domain::new(2, String::from("uncore"), String::from(""), 5, domain_sensor_data)) + }, + Err(e) => { + error!("Could'nt add Uncore domain."); + } + } + } + } Ok(topology) } @@ -355,3 +526,75 @@ impl Sensor for MsrRAPLSensor { Box::new(topology) } } + +unsafe fn get_msr_value(core_id: usize, msr_addr: u64, sensor_data: &HashMap) -> Result { + match get_handle(sensor_data.get("DRIVER_NAME").unwrap()) { + Ok(device) => { + let mut msr_result: u64 = 0; + let ptr_result = &mut msr_result as *mut u64; + let mut core_id: u32 = 0; + // get core numbers tied to the socket + match core_affinity::get_core_ids() { + Some(core_ids) => { + for c in core_ids { + if c.id == core_id as usize { + core_affinity::set_for_current(c); + warn!("Set core_affinity to {}", c.id); + break; + } + } + }, + None => { + warn!("Could'nt get core ids from core_affinity."); + } + } + //warn!("msr: {:x}", (MSR_PKG_ENERGY_STATUS as u64)); + //warn!("msr: {:b}", (MSR_PKG_ENERGY_STATUS as u64)); + //warn!("core_id: {:x} {:b}", (core_id as u64), (core_id as u64)); + //warn!("core_id: {:b}", ((core_id as u64) << 54)); + let src = ((core_id as u64) << 32) | msr_addr; + let ptr = &src as *const u64; + + //warn!("src: {:x}", src); + //warn!("src: {:b}", src); + //warn!("*ptr: {}", *ptr); + //warn!("*ptr: {:b}", *ptr); + + match send_request( + device, + MSR_PKG_ENERGY_STATUS, + ptr, + 8, + ptr_result, + size_of::(), + ) { + Ok(res) => { + close_handle(device); + + let energy_unit = sensor_data + .get("ENERGY_UNIT") + .unwrap() + .parse::() + .unwrap(); + let current_value = MsrRAPLSensor::extract_rapl_current_power(msr_result, energy_unit); + warn!("current_value: {}", current_value); + + Ok(Record { + timestamp: current_system_time_since_epoch(), + unit: super::units::Unit::MicroJoule, + value: current_value, + }) + }, + Err(e) => { + error!("Failed to get data from send_request: {:?}", e); + close_handle(device); + Err(format!("Failed to get data from send_request: {:?}", e)) + } + } + }, + Err(e) => { + error!("Couldn't get driver handle : {:?}", e); + Err(format!("Couldn't get driver handle : {:?}", e)) + } + } +} \ No newline at end of file From 197c84fb920ca8bd5177d6027b86c7a4ae037727 Mon Sep 17 00:00:00 2001 From: bpetit Date: Fri, 6 Oct 2023 17:02:20 +0200 Subject: [PATCH 21/33] chore: loop to get informations about cores and sockets --- src/sensors/msr_rapl.rs | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/src/sensors/msr_rapl.rs b/src/sensors/msr_rapl.rs index c70c864e..6d2872a4 100644 --- a/src/sensors/msr_rapl.rs +++ b/src/sensors/msr_rapl.rs @@ -395,18 +395,32 @@ impl Sensor for MsrRAPLSensor { warn!("Couldn't get cpuinfo"); } } - for i in 0..5 { - match cpuid.get_extended_topology_info() { - Some(info) => { - warn!("Got CPU topo info {:?}", info); - for t in info { - if t.level_type() == TopologyType::Core { - logical_cpus_from_cpuid = t.processors() + for i in 0..logical_cpus.len() { + match core_affinity::get_core_ids() { + Some(core_ids) => { + for c in core_ids { + if c.id == i as usize { + core_affinity::set_for_current(c); + warn!("Set core_affinity to {}", c.id); + match cpuid.get_extended_topology_info() { + Some(info) => { + warn!("Got CPU topo info {:?}", info); + for t in info { + if t.level_type() == TopologyType::Core { + logical_cpus_from_cpuid = t.processors() + } + } + }, + None => { + warn!("Couldn't get cpu topo info"); + } + } + break; } - } + } }, None => { - warn!("Couldn't get cpu topo info"); + warn!("Could'nt get core ids from core_affinity."); } } } From 9f8381ef5eaecdc99904709a3b0f6ef79b946693 Mon Sep 17 00:00:00 2001 From: bpetit Date: Tue, 14 Nov 2023 10:56:58 +0100 Subject: [PATCH 22/33] fix: getting core - socket mapping from apic_id --- Cargo.lock | 18 +++ Cargo.toml | 1 + src/lib.rs | 4 +- src/main.rs | 9 +- src/sensors/mod.rs | 62 +++++---- src/sensors/msr_rapl.rs | 270 +++++++++++++++++++++++----------------- 6 files changed, 218 insertions(+), 146 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a971b21e..94da456c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -118,6 +118,12 @@ version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" +[[package]] +name = "bit_field" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc827186963e592360843fb5ba4b973e145841266c1357f7180c43526f2e5b61" + [[package]] name = "bitflags" version = "1.3.2" @@ -1508,6 +1514,7 @@ dependencies = [ "warp10", "windows 0.27.0", "windows-service", + "x86", ] [[package]] @@ -2314,6 +2321,17 @@ version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" +[[package]] +name = "x86" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2781db97787217ad2a2845c396a5efe286f87467a5810836db6d74926e94a385" +dependencies = [ + "bit_field", + "bitflags", + "raw-cpuid", +] + [[package]] name = "yaml-rust" version = "0.4.5" diff --git a/Cargo.toml b/Cargo.toml index e7478467..cab7a7ec 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,6 +42,7 @@ windows = { version = "0.27.0", features = ["alloc","Win32_Storage_FileSystem"," windows-service = { version = "0.6.0" } raw-cpuid = { version = "10.5.0" } core_affinity = { version = "0.8.1"} +x86 = { version = "0.52.0" } [features] default = ["prometheus", "riemann", "warpten", "json", "containers", "prometheuspush"] diff --git a/src/lib.rs b/src/lib.rs index 352cbdc8..60b65c46 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -27,9 +27,7 @@ pub fn get_default_sensor() -> impl sensors::Sensor { ); #[cfg(target_os = "windows")] - return msr_rapl::MsrRAPLSensor::new( - 1 - ); + return msr_rapl::MsrRAPLSensor::new(); } fn current_system_time_since_epoch() -> Duration { diff --git a/src/main.rs b/src/main.rs index 899f0463..187fd3fc 100644 --- a/src/main.rs +++ b/src/main.rs @@ -74,11 +74,6 @@ struct Cli { #[cfg(target_os = "linux")] #[arg(long, default_value_t = powercap_rapl::DEFAULT_BUFFER_PER_SOCKET_MAX_KBYTES)] sensor_buffer_per_socket_max_kb: u16, - - /// Number of physical CPU packages/sockets enabled on the host - #[cfg(target_os = "windows")] - #[arg(long, default_value_t = 1)] - sensor_nb_cpu_sockets: u16, } /// Defines the possible subcommands, one per exporter. @@ -286,9 +281,7 @@ fn build_sensor(cli: &Cli) -> impl Sensor { #[cfg(target_os = "windows")] let msr_sensor_win = || { - msr_rapl::MsrRAPLSensor::new( - cli.sensor_nb_cpu_sockets - ) + msr_rapl::MsrRAPLSensor::new() }; match cli.sensor.as_deref() { diff --git a/src/sensors/mod.rs b/src/sensors/mod.rs index a1f08211..d64c33c6 100644 --- a/src/sensors/mod.rs +++ b/src/sensors/mod.rs @@ -177,6 +177,7 @@ impl Topology { let sysinfo_system = System::new_all(); let sysinfo_cores = sysinfo_system.cpus(); + warn!("Sysinfo sees {}", sysinfo_cores.len()); #[cfg(target_os = "linux")] let cpuinfo = CpuInfo::new().unwrap(); for (id, c) in (0_u16..).zip(sysinfo_cores.iter()) { @@ -206,7 +207,7 @@ impl Topology { counter_uj_path: String, buffer_max_kbytes: u16, sensor_data: HashMap, - ) { + ) -> Option { if !self.sockets.iter().any(|s| s.id == socket_id) { let socket = CPUSocket::new( socket_id, @@ -216,10 +217,23 @@ impl Topology { buffer_max_kbytes, sensor_data, ); + let res = socket.clone(); self.sockets.push(socket); + Some(res) + } else { + None } } + pub fn safe_insert_socket( + &mut self, + socket: CPUSocket + ) { + if !self.sockets.iter().any(|s| s.id == socket.id) { + self.sockets.push(socket); + } + } + /// Returns a immutable reference to self.proc_tracker pub fn get_proc_tracker(&self) -> &ProcessTracker { &self.proc_tracker @@ -302,24 +316,28 @@ impl Topology { } } } - #[cfg(target_os = "windows")] - { - let nb_cores_per_socket = &cores.len() / &self.sockets.len(); - warn!("nb_cores_per_socket: {} cores_len: {} sockets_len: {}", nb_cores_per_socket, &cores.len(), &self.sockets.len()); - for s in self.sockets.iter_mut().rev() { - for c in 0..nb_cores_per_socket { - match cores.pop() { - Some(core) => { - warn!("adding core {} to socket {}", core.id, s.id); - s.add_cpu_core(core); - }, - None => { - error!("Uneven number of CPU cores !"); - } - } - } - } - } + //#[cfg(target_os = "windows")] + //{ + //TODO: fix + //let nb_sockets = &self.sockets.len(); + //let mut socket_counter = 0; + //let nb_cores_per_socket = &cores.len() / nb_sockets; + //warn!("nb_cores_per_socket: {} cores_len: {} sockets_len: {}", nb_cores_per_socket, &cores.len(), &self.sockets.len()); + //for s in self.sockets.iter_mut() { + // for c in (socket_counter * nb_cores_per_socket)..((socket_counter+1) * nb_cores_per_socket) { + // match cores.pop() { + // Some(core) => { + // warn!("adding core {} to socket {}", core.id, s.id); + // s.add_cpu_core(core); + // }, + // None => { + // error!("Uneven number of CPU cores !"); + // } + // } + // } + // socket_counter = socket_counter + 1; + //} + //} } else { panic!("Couldn't retrieve any CPU Core from the topology. (generate_cpu_cores)"); } @@ -1553,7 +1571,7 @@ mod tests { #[cfg(target_os = "linux")] let sensor = powercap_rapl::PowercapRAPLSensor::new(8, 8, false); #[cfg(not(target_os = "linux"))] - let sensor = msr_rapl::MsrRAPLSensor::new(1); + let sensor = msr_rapl::MsrRAPLSensor::new(); let topo = (*sensor.get_topology()).unwrap(); println!("{:?}", topo.read_stats()); } @@ -1563,7 +1581,7 @@ mod tests { #[cfg(target_os = "linux")] let sensor = powercap_rapl::PowercapRAPLSensor::new(8, 8, false); #[cfg(not(target_os = "linux"))] - let sensor = msr_rapl::MsrRAPLSensor::new(1); + let sensor = msr_rapl::MsrRAPLSensor::new(); let mut topo = (*sensor.get_topology()).unwrap(); for s in topo.get_sockets() { for c in s.get_cores() { @@ -1577,7 +1595,7 @@ mod tests { #[cfg(target_os = "linux")] let sensor = powercap_rapl::PowercapRAPLSensor::new(8, 8, false); #[cfg(not(target_os = "linux"))] - let sensor = msr_rapl::MsrRAPLSensor::new(1); + let sensor = msr_rapl::MsrRAPLSensor::new(); let mut topo = (*sensor.get_topology()).unwrap(); for s in topo.get_sockets() { println!("{:?}", s.read_stats()); diff --git a/src/sensors/msr_rapl.rs b/src/sensors/msr_rapl.rs index 6d2872a4..3517adef 100644 --- a/src/sensors/msr_rapl.rs +++ b/src/sensors/msr_rapl.rs @@ -1,9 +1,9 @@ use crate::sensors::utils::current_system_time_since_epoch; -use crate::sensors::{CPUSocket, Domain, Record, RecordReader, Sensor, Topology}; +use crate::sensors::{CPUSocket, Domain, Record, RecordReader, Sensor, Topology, CPUCore}; use std::collections::HashMap; use std::error::Error; use std::mem::size_of; -use sysinfo::{System, SystemExt}; +use sysinfo::{System, SystemExt, CpuExt, Cpu}; use raw_cpuid::{CpuId, TopologyType}; use windows::Win32::Foundation::{CloseHandle, GetLastError, HANDLE, INVALID_HANDLE_VALUE}; use windows::Win32::Storage::FileSystem::{ @@ -12,22 +12,23 @@ use windows::Win32::Storage::FileSystem::{ }; use windows::Win32::System::Ioctl::{FILE_DEVICE_UNKNOWN, METHOD_BUFFERED}; use windows::Win32::System::IO::DeviceIoControl; +use windows::Win32::System::Threading::SetThreadGroupAffinity; -use core_affinity; +use core_affinity::{self, CoreId}; +use x86::cpuid; // Intel RAPL MSRs -const MSR_RAPL_POWER_UNIT: u32 = 0x606; // -const MSR_PKG_POWER_LIMIT: u32 = 0x610; // PKG RAPL Power Limit Control (R/W) See Section 14.7.3, Package RAPL Domain. -const MSR_PKG_ENERGY_STATUS: u32 = 0x611; -const MSR_PKG_POWER_INFO: u32 = 0x614; -const MSR_DRAM_ENERGY_STATUS: u32 = 0x619; -const MSR_PP0_ENERGY_STATUS: u32 = 0x639; //PP0 Energy Status (R/O) See Section 14.7.4, PP0/PP1 RAPL Domains. -const MSR_PP0_PERF_STATUS: u32 = 0x63b; // PP0 Performance Throttling Status (R/O) See Section 14.7.4, PP0/PP1 RAPL Domains. -const MSR_PP0_POLICY: u32 = 0x63a; //PP0 Balance Policy (R/W) See Section 14.7.4, PP0/PP1 RAPL Domains. -const MSR_PP0_POWER_LIMIT: u32 = 0x638; // PP0 RAPL Power Limit Control (R/W) See Section 14.7.4, PP0/PP1 RAPL Domains. -const MSR_PP1_ENERGY_STATUS: u32 = 0x641; // PP1 Energy Status (R/O) See Section 14.7.4, PP0/PP1 RAPL Domains. -const MSR_PP1_POLICY: u32 = 0x642; // PP1 Balance Policy (R/W) See Section 14.7.4, PP0/PP1 RAPL Domains. -const MSR_PP1_POWER_LIMIT: u32 = 0x640; // PP1 RAPL Power Limit Control (R/W) See Section 14.7.4, PP0/PP1 RAPL Domains. +use x86::msr::{ + MSR_RAPL_POWER_UNIT, + MSR_PKG_POWER_LIMIT, + MSR_PKG_POWER_INFO, + MSR_PKG_ENERGY_STATUS, + MSR_DRAM_ENERGY_STATUS, + MSR_DRAM_PERF_STATUS, + MSR_PP0_ENERGY_STATUS, + MSR_PP0_PERF_STATUS, + MSR_PP1_ENERGY_STATUS, +}; const MSR_PLATFORM_ENERGY_STATUS: u32 = 0x0000064d; const MSR_PLATFORM_POWER_LIMIT: u32 = 0x0000065c ; @@ -79,17 +80,16 @@ pub struct MsrRAPLSensor { power_unit: f64, energy_unit: f64, time_unit: f64, - nb_cpu_sockets: u16 } impl Default for MsrRAPLSensor { fn default() -> Self { - Self::new(1) + Self::new() } } impl MsrRAPLSensor { - pub fn new(nb_cpu_sockets: u16) -> MsrRAPLSensor { + pub fn new() -> MsrRAPLSensor { let driver_name = "\\\\.\\ScaphandreDriver"; let mut power_unit: f64 = 1.0; @@ -127,7 +127,6 @@ impl MsrRAPLSensor { energy_unit, power_unit, time_unit, - nb_cpu_sockets } } @@ -243,7 +242,7 @@ impl RecordReader for CPUSocket { Ok(device) => { let mut msr_result: u64 = 0; let ptr_result = &mut msr_result as *mut u64; - let mut core_id: u32 = 0; + let mut core_id: u32 = 2; // get core numbers tied to the socket if let Some(core) = self.cpu_cores.first() { core_id = core.id as u32; @@ -251,8 +250,11 @@ impl RecordReader for CPUSocket { Some(core_ids) => { for c in core_ids { if c.id == core.id as usize { - core_affinity::set_for_current(c); - warn!("Set core_affinity to {}", c.id); + if core_affinity::set_for_current(c) { + warn!("Set core_affinity to {}", c.id); + } else { + warn!("Failed to set core_affinity to {}", c.id); + } break; } } @@ -329,10 +331,12 @@ impl RecordReader for CPUSocket { } impl RecordReader for Domain { fn read_record(&self) -> Result> { - if let core_id = self.sensor_data.get("CORE_ID").unwrap().parse::().unwrap() { - if let msr_addr = self.sensor_data.get("MSR_ADDR").unwrap().parse::().unwrap() { + if let Some(core_id) = self.sensor_data.get("CORE_ID") { + let usize_coreid = core_id.parse::().unwrap(); + warn!("Reading Domain {} on Core {}", self.name, usize_coreid); + if let Some(msr_addr) = self.sensor_data.get("MSR_ADDR") { unsafe { - match get_msr_value(core_id, msr_addr, &self.sensor_data) { + match get_msr_value(usize_coreid, msr_addr.parse::().unwrap(), &self.sensor_data) { Ok(rec) => { return Ok(Record { timestamp: current_system_time_since_epoch(), @@ -359,12 +363,6 @@ impl RecordReader for Domain { } } -//fn get_cpu_info() -> Option { -// let cpuid = CpuId::new(); -// -// -//} - impl Sensor for MsrRAPLSensor { fn generate_topology(&self) -> Result> { let mut sensor_data = HashMap::new(); @@ -376,38 +374,70 @@ impl Sensor for MsrRAPLSensor { let mut topology = Topology::new(sensor_data.clone()); let mut sys = System::new_all(); sys.refresh_all(); - //TODO fix that to actually count the number of sockets - let mut i = 0; + let mut i: u16 = 0; let logical_cpus = sys.cpus() ; - - warn!("Got {} sockets CPU from command line", self.nb_cpu_sockets); - - let mut nb_cpu_sockets = 0; - let mut logical_cpus_from_cpuid = 0; + let mut nb_cpu_sockets: u16 = 0; let cpuid = CpuId::new(); - match cpuid.get_vendor_info() { + let mut logical_cpus_from_cpuid = 1; + match cpuid.get_extended_topology_info() { Some(info) => { - warn!("Got CPU {:?}", info); + for t in info { + if t.level_type() == TopologyType::Core { + logical_cpus_from_cpuid = t.processors(); + } + } }, None => { - warn!("Couldn't get cpuinfo"); + panic!("Could'nt get cpuid data."); } } - for i in 0..logical_cpus.len() { - match core_affinity::get_core_ids() { - Some(core_ids) => { - for c in core_ids { - if c.id == i as usize { - core_affinity::set_for_current(c); - warn!("Set core_affinity to {}", c.id); + if logical_cpus_from_cpuid <= 1 { + panic!("CpuID data is likely to be wrong."); + } + let mut no_more_sockets = false; + + match core_affinity::get_core_ids() { + Some(core_ids) => { + warn!("CPU SETUP - Cores from core_affinity, len={} : {:?}", core_ids.len(), core_ids); + warn!("CPU SETUP - Logical CPUs from sysinfo: {}", logical_cpus.len()); + while !no_more_sockets { + let start = i * logical_cpus_from_cpuid; + let stop = (i+1)*logical_cpus_from_cpuid; + warn!("Looping over {} .. {}", start, stop); + let mut current_socket = CPUSocket::new(i, vec![], vec![], String::from(""),1, sensor_data.clone()); + for c in start..stop {//core_ids { + if core_affinity::set_for_current(CoreId { id: c.into() }) { + match cpuid.get_vendor_info() { + Some(info) => { + warn!("Got CPU {:?}", info); + }, + None => { + warn!("Couldn't get cpuinfo"); + } + } + warn!("Set core_affinity to {}", c); match cpuid.get_extended_topology_info() { Some(info) => { warn!("Got CPU topo info {:?}", info); for t in info { if t.level_type() == TopologyType::Core { - logical_cpus_from_cpuid = t.processors() + //nb_cpu_sockets = logical_cpus.len() as u16 / t.processors(); + //logical_cpus_from_cpuid = t.processors() + let x2apic_id = t.x2apic_id(); + let socket_id = (x2apic_id & 240) >> 4; // upper bits of x2apic_id are socket_id, mask them, then bit shift to get socket_id + let core_id = x2apic_id & 15; // 4 last bits of x2apic_id are the core_id (per-socket) + warn!("Found socketid={} and coreid={}", socket_id, core_id); + let mut attributes = HashMap::::new(); + let ref_core = logical_cpus.first().unwrap(); + attributes.insert(String::from("frequency"), ref_core.frequency().to_string()); + attributes.insert(String::from("name"), ref_core.name().to_string()); + attributes.insert(String::from("vendor_id"), ref_core.vendor_id().to_string()); + attributes.insert(String::from("brand"), ref_core.brand().to_string()); + warn!("Adding core id {} to socket_id {}", ((i * (logical_cpus_from_cpuid - 1)) + core_id as u16), current_socket.id); + current_socket.add_cpu_core(CPUCore::new((i * (logical_cpus_from_cpuid - 1)) + core_id as u16, attributes)); + warn!("Reviewing sockets : {:?}", topology.get_sockets_passive()); } } }, @@ -415,81 +445,88 @@ impl Sensor for MsrRAPLSensor { warn!("Couldn't get cpu topo info"); } } + } else { + no_more_sockets = true; + warn!("There's likely to be no more socket to explore."); break; } } - }, - None => { - warn!("Could'nt get core ids from core_affinity."); + if !no_more_sockets { + warn!("inserting socket {:?}", current_socket); + topology.safe_insert_socket(current_socket); + i = i + 1; + } } - } - } - warn!("Logical cpus from sysinfo: {} logical cpus from cpuid: {}", logical_cpus.len(), logical_cpus_from_cpuid); - match cpuid.get_advanced_power_mgmt_info() { - Some(info) => { - warn!("Got CPU power mgmt info {:?}", info); - }, - None => { - warn!("Couldn't get cpu power info"); - } - } - match cpuid.get_extended_feature_info() { - Some(info) => { - warn!("Got CPU feature info {:?}", info); - }, - None => { - warn!("Couldn't get cpu feature info"); - } - } - match cpuid.get_performance_monitoring_info() { - Some(info) => { - warn!("Got CPU perfmonitoring info {:?}", info); - }, - None => { - warn!("Couldn't get cpu perfmonitoring info"); - } - } - match cpuid.get_thermal_power_info() { - Some(info) => { - warn!("Got CPU thermal info {:?}", info); - }, - None => { - warn!("Couldn't get cpu thermal info"); - } - } - match cpuid.get_extended_state_info() { - Some(info) => { - warn!("Got CPU state info {:?}", info); - }, - None => { - warn!("Couldn't get cpu state info"); - } - } - match cpuid.get_processor_capacity_feature_info() { - Some(info) => { - warn!("Got CPU capacity info {:?}", info); + nb_cpu_sockets = i; }, None => { - warn!("Couldn't get cpu capacity info"); + panic!("Could'nt get core ids from core_affinity."); } } - - if self.nb_cpu_sockets > 2 && logical_cpus.len() < 12 { - warn!("Scaphandre has been told to expect {} CPU sockets but there is less than 12 logical cores in total ({}).", self.nb_cpu_sockets, logical_cpus.len()); - warn!("This is unlikely, be careful to configure Scaphandre for the right number of active CPU sockets on your machine"); - } - while i < self.nb_cpu_sockets { - topology.safe_add_socket(i, vec![], vec![], String::from(""), 4, sensor_data.clone()); - - //topology.safe_add_domain_to_socket(i, , name, uj_counter, buffer_max_kbytes, sensor_data) - i = i + 1; - } - - topology.add_cpu_cores(); + //nb_cpu_sockets = logical_cpus.len() as u16 / logical_cpus_from_cpuid; + //let mut core_id_counter = logical_cpus.len(); + + //match cpuid.get_advanced_power_mgmt_info() { + // Some(info) => { + // warn!("Got CPU power mgmt info {:?}", info); + // }, + // None => { + // warn!("Couldn't get cpu power info"); + // } + //} + //match cpuid.get_extended_feature_info() { + // Some(info) => { + // warn!("Got CPU feature info {:?}", info); + // }, + // None => { + // warn!("Couldn't get cpu feature info"); + // } + //} + //match cpuid.get_performance_monitoring_info() { + // Some(info) => { + // warn!("Got CPU perfmonitoring info {:?}", info); + // }, + // None => { + // warn!("Couldn't get cpu perfmonitoring info"); + // } + //} + //match cpuid.get_thermal_power_info() { + // Some(info) => { + // warn!("Got CPU thermal info {:?}", info); + // }, + // None => { + // warn!("Couldn't get cpu thermal info"); + // } + //} + //match cpuid.get_extended_state_info() { + // Some(info) => { + // warn!("Got CPU state info {:?}", info); + // }, + // None => { + // warn!("Couldn't get cpu state info"); + // } + //} + //match cpuid.get_processor_capacity_feature_info() { + // Some(info) => { + // warn!("Got CPU capacity info {:?}", info); + // }, + // None => { + // warn!("Couldn't get cpu capacity info"); + // } + //} + //TODO: fix + //i=0; + //while i < nb_cpu_sockets { + // //topology.safe_add_domain_to_socket(i, , name, uj_counter, buffer_max_kbytes, sensor_data) + // i = i + 1; + //} + + //topology.add_cpu_cores(); for s in topology.get_sockets() { + warn!("Inspecting CPUSocket: {:?}", s); unsafe { - let core_id = s.get_cores_passive().first().unwrap().id; + let core_id = s.get_cores_passive().get(0).unwrap().id; match get_msr_value(core_id as usize, MSR_DRAM_ENERGY_STATUS as u64, &sensor_data) { Ok(rec) => { warn!("Added domain Dram !"); @@ -526,6 +563,13 @@ impl Sensor for MsrRAPLSensor { error!("Could'nt add Uncore domain."); } } + //match get_msr_value(core_id as usize, MSR_PLATFORM_ENERGY_STATUS as u64, &sensor_data) { + // Ok(rec) => { + // }, + // Err(e) => { + // error!("Could'nt find Platform/PSYS domain."); + // } + //} } } From 746fa16a503d2ea4a57d2a40dd5facd75aca3184 Mon Sep 17 00:00:00 2001 From: bpetit Date: Wed, 22 Nov 2023 10:21:58 +0100 Subject: [PATCH 23/33] style: cleaning code --- Cargo.toml | 2 +- src/sensors/mod.rs | 4 + src/sensors/msr_rapl.rs | 356 +++++++++++++++++++--------------------- 3 files changed, 177 insertions(+), 185 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index cab7a7ec..2074d408 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,7 +38,7 @@ isahc = { version = "1.7.2", optional = true } procfs = { version = "0.15.0" } [target.'cfg(target_os="windows")'.dependencies] -windows = { version = "0.27.0", features = ["alloc","Win32_Storage_FileSystem","Win32_Foundation","Win32_Security","Win32_System_IO","Win32_System_Ioctl"]} +windows = { version = "0.27.0", features = ["alloc","Win32_Storage_FileSystem","Win32_Foundation","Win32_Security","Win32_System_IO","Win32_System_Ioctl","Win32_System_Threading", "Win32_System_SystemInformation"]} windows-service = { version = "0.6.0" } raw-cpuid = { version = "10.5.0" } core_affinity = { version = "0.8.1"} diff --git a/src/sensors/mod.rs b/src/sensors/mod.rs index d64c33c6..3fa8f302 100644 --- a/src/sensors/mod.rs +++ b/src/sensors/mod.rs @@ -1048,6 +1048,10 @@ impl CPUSocket { } } + pub fn set_id(&mut self, id: u16) { + self.id = id + } + /// Adds a new Domain instance to the domains vector if and only if it doesn't exist in the vector already. fn safe_add_domain(&mut self, domain: Domain) { if !self.domains.iter().any(|d| d.id == domain.id) { diff --git a/src/sensors/msr_rapl.rs b/src/sensors/msr_rapl.rs index 3517adef..aef55c66 100644 --- a/src/sensors/msr_rapl.rs +++ b/src/sensors/msr_rapl.rs @@ -12,7 +12,11 @@ use windows::Win32::Storage::FileSystem::{ }; use windows::Win32::System::Ioctl::{FILE_DEVICE_UNKNOWN, METHOD_BUFFERED}; use windows::Win32::System::IO::DeviceIoControl; -use windows::Win32::System::Threading::SetThreadGroupAffinity; +use windows::Win32::System::Threading::{ + GetThreadGroupAffinity, GetProcessGroupAffinity, GetCurrentProcess, GetProcessInformation, + GetCurrentThread, GetActiveProcessorGroupCount, SetThreadGroupAffinity +}; +use windows::Win32::System::SystemInformation::GROUP_AFFINITY; use core_affinity::{self, CoreId}; @@ -237,94 +241,35 @@ unsafe fn send_request( impl RecordReader for CPUSocket { fn read_record(&self) -> Result> { unsafe { - let driver_name = self.sensor_data.get("DRIVER_NAME").unwrap(); - match get_handle(driver_name) { - Ok(device) => { - let mut msr_result: u64 = 0; - let ptr_result = &mut msr_result as *mut u64; - let mut core_id: u32 = 2; - // get core numbers tied to the socket - if let Some(core) = self.cpu_cores.first() { - core_id = core.id as u32; - match core_affinity::get_core_ids() { - Some(core_ids) => { - for c in core_ids { - if c.id == core.id as usize { - if core_affinity::set_for_current(c) { - warn!("Set core_affinity to {}", c.id); - } else { - warn!("Failed to set core_affinity to {}", c.id); - } - break; - } - } - }, - None => { - warn!("Could'nt get core ids from core_affinity."); - } - } - } else { - panic!("Couldn't get a CPUCore in socket {}", self.id); - } - warn!("msr: {:x}", (MSR_PKG_ENERGY_STATUS as u64)); - warn!("msr: {:b}", (MSR_PKG_ENERGY_STATUS as u64)); - warn!("core_id: {:x} {:b}", (core_id as u64), (core_id as u64)); - warn!("core_id: {:b}", ((core_id as u64) << 54)); - let src = ((core_id as u64) << 32) | (MSR_PKG_ENERGY_STATUS as u64); - let ptr = &src as *const u64; - - warn!("src: {:x}", src); - warn!("src: {:b}", src); - - warn!("*ptr: {}", *ptr); - warn!("*ptr: {:b}", *ptr); - trace!("&request: {:?} ptr (as *const u8): {:?}", &src, ptr); - - match send_request( - device, - MSR_PKG_ENERGY_STATUS, - // nouvelle version à integrer : request_code est ignoré et request doit contenir - // request_code sous forme d'un char * - ptr, - 8, - ptr_result, - size_of::(), - ) { - Ok(res) => { - debug!("{}", res); - - close_handle(device); - - let energy_unit = self - .sensor_data - .get("ENERGY_UNIT") - .unwrap() - .parse::() - .unwrap(); - - let current_power = MsrRAPLSensor::extract_rapl_current_power(msr_result, energy_unit); - warn!("current_power: {}", current_power); - - Ok(Record { - timestamp: current_system_time_since_epoch(), - unit: super::units::Unit::MicroJoule, - value: current_power, - }) + let current_thread = GetCurrentThread(); + let processorgroup_id = self.sensor_data.get("PROCESSORGROUP_ID").unwrap().parse::().unwrap(); + let mut thread_group_affinity: GROUP_AFFINITY = GROUP_AFFINITY { Mask: 255, Group: processorgroup_id, Reserved: [0,0,0] }; + let thread_affinity = GetThreadGroupAffinity(current_thread, &mut thread_group_affinity); + if thread_affinity.as_bool() { + warn!("got thead_affinity : {:?}", thread_group_affinity); + let core_id = self.cpu_cores.last().unwrap().id; //(self.cpu_cores.last().unwrap().id + self.id * self.cpu_cores.len() as u16) as usize + let newaffinity = GROUP_AFFINITY { Mask: (self.cpu_cores.len() + self.id as usize * self.cpu_cores.len() - 1) as usize, Group: processorgroup_id, Reserved: [0, 0, 0]}; + let res = SetThreadGroupAffinity(current_thread, &newaffinity, &mut thread_group_affinity); + if res.as_bool() { + warn!("Asking get_msr_value, from socket, with core_id={}", core_id); + match get_msr_value(core_id as usize, MSR_PKG_ENERGY_STATUS as u64, &self.sensor_data) { + Ok(rec) => { + return Ok(Record { timestamp: current_system_time_since_epoch(), value: rec.value, unit: super::units::Unit::MicroJoule }) }, Err(e) => { - error!("Failed to get data from send_request: {:?}", e); - close_handle(device); - Ok(Record { + error!("Could'nt get MSR value for {}: {}", MSR_PKG_ENERGY_STATUS, e); + return Ok(Record { timestamp: current_system_time_since_epoch(), - unit: super::units::Unit::MicroJoule, value: String::from("0"), + unit: super::units::Unit::MicroJoule }) } } - }, - Err(e) => { - panic!("Couldn't get driver handle : {:?}", e); + } else { + panic!("Couldn't set Thread affinity !"); } + } else { + panic!("Coudld'nt get Thread affinity !"); } } } @@ -336,6 +281,7 @@ impl RecordReader for Domain { warn!("Reading Domain {} on Core {}", self.name, usize_coreid); if let Some(msr_addr) = self.sensor_data.get("MSR_ADDR") { unsafe { + warn!("Asking, from Domain, get_msr_value with core_id={}", usize_coreid); match get_msr_value(usize_coreid, msr_addr.parse::().unwrap(), &self.sensor_data) { Ok(rec) => { return Ok(Record { @@ -374,94 +320,132 @@ impl Sensor for MsrRAPLSensor { let mut topology = Topology::new(sensor_data.clone()); let mut sys = System::new_all(); sys.refresh_all(); - - //TODO fix that to actually count the number of sockets - let mut i: u16 = 0; - let logical_cpus = sys.cpus() ; - let mut nb_cpu_sockets: u16 = 0; - let cpuid = CpuId::new(); - let mut logical_cpus_from_cpuid = 1; - match cpuid.get_extended_topology_info() { - Some(info) => { - for t in info { - if t.level_type() == TopologyType::Core { - logical_cpus_from_cpuid = t.processors(); + + unsafe { + let current_thread = GetCurrentThread(); + + let group_count = GetActiveProcessorGroupCount(); + warn!("GROUP COUNT : {}", group_count); + + for group_id in 0..group_count { + //TODO fix that to actually count the number of sockets + let logical_cpus = sys.cpus() ; + let mut nb_cpu_sockets: u16 = 0; + let cpuid = CpuId::new(); + let mut logical_cpus_from_cpuid = 1; + match cpuid.get_extended_topology_info() { + Some(info) => { + for t in info { + if t.level_type() == TopologyType::Core { + logical_cpus_from_cpuid = t.processors(); + } + } + }, + None => { + panic!("Could'nt get cpuid data."); } } - }, - None => { - panic!("Could'nt get cpuid data."); - } - } - if logical_cpus_from_cpuid <= 1 { - panic!("CpuID data is likely to be wrong."); - } - let mut no_more_sockets = false; - - match core_affinity::get_core_ids() { - Some(core_ids) => { - warn!("CPU SETUP - Cores from core_affinity, len={} : {:?}", core_ids.len(), core_ids); - warn!("CPU SETUP - Logical CPUs from sysinfo: {}", logical_cpus.len()); - while !no_more_sockets { - let start = i * logical_cpus_from_cpuid; - let stop = (i+1)*logical_cpus_from_cpuid; - warn!("Looping over {} .. {}", start, stop); - let mut current_socket = CPUSocket::new(i, vec![], vec![], String::from(""),1, sensor_data.clone()); - for c in start..stop {//core_ids { - if core_affinity::set_for_current(CoreId { id: c.into() }) { - match cpuid.get_vendor_info() { - Some(info) => { - warn!("Got CPU {:?}", info); - }, - None => { - warn!("Couldn't get cpuinfo"); - } - } - warn!("Set core_affinity to {}", c); - match cpuid.get_extended_topology_info() { - Some(info) => { - warn!("Got CPU topo info {:?}", info); - for t in info { - if t.level_type() == TopologyType::Core { - //nb_cpu_sockets = logical_cpus.len() as u16 / t.processors(); - //logical_cpus_from_cpuid = t.processors() - let x2apic_id = t.x2apic_id(); - let socket_id = (x2apic_id & 240) >> 4; // upper bits of x2apic_id are socket_id, mask them, then bit shift to get socket_id - let core_id = x2apic_id & 15; // 4 last bits of x2apic_id are the core_id (per-socket) - warn!("Found socketid={} and coreid={}", socket_id, core_id); - let mut attributes = HashMap::::new(); - let ref_core = logical_cpus.first().unwrap(); - attributes.insert(String::from("frequency"), ref_core.frequency().to_string()); - attributes.insert(String::from("name"), ref_core.name().to_string()); - attributes.insert(String::from("vendor_id"), ref_core.vendor_id().to_string()); - attributes.insert(String::from("brand"), ref_core.brand().to_string()); - warn!("Adding core id {} to socket_id {}", ((i * (logical_cpus_from_cpuid - 1)) + core_id as u16), current_socket.id); - current_socket.add_cpu_core(CPUCore::new((i * (logical_cpus_from_cpuid - 1)) + core_id as u16, attributes)); - warn!("Reviewing sockets : {:?}", topology.get_sockets_passive()); + if logical_cpus_from_cpuid <= 1 { + panic!("CpuID data is likely to be wrong."); + } + let mut i: u16 = 0; + let mut no_more_sockets = false; + warn!("Entering ProcessorGroup {}", group_id); + let newaffinity = GROUP_AFFINITY { Mask: 255, Group: group_id, Reserved: [0, 0, 0]}; + let mut thread_group_affinity: GROUP_AFFINITY = GROUP_AFFINITY { Mask: 255, Group: 0, Reserved: [0,0,0] }; + let thread_affinity = GetThreadGroupAffinity(current_thread, &mut thread_group_affinity); + warn!("Thread group affinity result : {:?}", thread_affinity); + if thread_affinity.as_bool() { + warn!("got thead_affinity : {:?}", thread_group_affinity); + let res = SetThreadGroupAffinity(current_thread, &newaffinity, &mut thread_group_affinity); + if res.as_bool() { + warn!("Have set thread affinity: {:?}", newaffinity); + match core_affinity::get_core_ids() { + Some(core_ids) => { + warn!("CPU SETUP - Cores from core_affinity, len={} : {:?}", core_ids.len(), core_ids); + warn!("CPU SETUP - Logical CPUs from sysinfo: {}", logical_cpus.len()); + while !no_more_sockets { + let start = i * logical_cpus_from_cpuid; + let stop = (i+1)*logical_cpus_from_cpuid; + warn!("Looping over {} .. {}", start, stop); + sensor_data.insert(String::from("PROCESSORGROUP_ID"), group_id.to_string()); + let mut current_socket = CPUSocket::new(i, vec![], vec![], String::from(""),1, sensor_data.clone()); + for c in start..stop {//core_ids { + if core_affinity::set_for_current(CoreId { id: c.into() }) { + match cpuid.get_vendor_info() { + Some(info) => { + warn!("Got CPU {:?}", info); + }, + None => { + warn!("Couldn't get cpuinfo"); + } + } + warn!("Set core_affinity to {}", c); + match cpuid.get_extended_topology_info() { + Some(info) => { + warn!("Got CPU topo info {:?}", info); + for t in info { + if t.level_type() == TopologyType::Core { + //nb_cpu_sockets = logical_cpus.len() as u16 / t.processors(); + //logical_cpus_from_cpuid = t.processors() + let x2apic_id = t.x2apic_id(); + let socket_id = (x2apic_id & 240) >> 4; // upper bits of x2apic_id are socket_id, mask them, then bit shift to get socket_id + current_socket.set_id(socket_id as u16); + let core_id = x2apic_id & 15; // 4 last bits of x2apic_id are the core_id (per-socket) + warn!("Found socketid={} and coreid={}", socket_id, core_id); + let mut attributes = HashMap::::new(); + let ref_core = logical_cpus.first().unwrap(); + attributes.insert(String::from("frequency"), ref_core.frequency().to_string()); + attributes.insert(String::from("name"), ref_core.name().to_string()); + attributes.insert(String::from("vendor_id"), ref_core.vendor_id().to_string()); + attributes.insert(String::from("brand"), ref_core.brand().to_string()); + warn!("Adding core id {} to socket_id {}", ((i * (logical_cpus_from_cpuid - 1)) + core_id as u16), current_socket.id); + current_socket.add_cpu_core(CPUCore::new((i * (logical_cpus_from_cpuid - 1)) + core_id as u16, attributes)); + warn!("Reviewing sockets : {:?}", topology.get_sockets_passive()); + } + } + }, + None => { + warn!("Couldn't get cpu topo info"); + } + } + } else { + no_more_sockets = true; + warn!("There's likely to be no more socket to explore."); + break; } + } + if !no_more_sockets { + warn!("inserting socket {:?}", current_socket); + topology.safe_insert_socket(current_socket); + i = i + 1; } - }, - None => { - warn!("Couldn't get cpu topo info"); + } + nb_cpu_sockets = i; + }, + None => { + panic!("Could'nt get core ids from core_affinity."); + } + } + if let Some(info) = CpuId::new().get_extended_topology_info() { + for c in info { + if c.level_type() == TopologyType::Core { + warn!("CPUID : {:?}", c); } } - } else { - no_more_sockets = true; - warn!("There's likely to be no more socket to explore."); - break; } - } - if !no_more_sockets { - warn!("inserting socket {:?}", current_socket); - topology.safe_insert_socket(current_socket); - i = i + 1; + } else { + error!("Could'nt set thread affinity !"); + let last_error = GetLastError(); + panic!("Error was : {:?}", last_error); } + } else { + warn!("Getting thread group affinity failed !"); + let last_error = GetLastError(); + panic!("Error was: {:?}", last_error); // win32 error 122 is insufficient buffer } - nb_cpu_sockets = i; - }, - None => { - panic!("Could'nt get core ids from core_affinity."); } + //let process_information = GetProcessInformation(current_process, , , ); } //nb_cpu_sockets = logical_cpus.len() as u16 / logical_cpus_from_cpuid; //let mut core_id_counter = logical_cpus.len(); @@ -526,13 +510,14 @@ impl Sensor for MsrRAPLSensor { for s in topology.get_sockets() { warn!("Inspecting CPUSocket: {:?}", s); unsafe { - let core_id = s.get_cores_passive().get(0).unwrap().id; + let core_id = s.get_cores_passive().last().unwrap().id + s.id * s.cpu_cores.len() as u16; + warn!("Asking get_msr_value, from generate_tpopo, with core_id={}", core_id); match get_msr_value(core_id as usize, MSR_DRAM_ENERGY_STATUS as u64, &sensor_data) { Ok(rec) => { warn!("Added domain Dram !"); let mut domain_sensor_data = sensor_data.clone(); domain_sensor_data.insert(String::from("MSR_ADDR"), MSR_DRAM_ENERGY_STATUS.to_string()); - domain_sensor_data.insert(String::from("CORE_ID"), core_id.to_string()); + domain_sensor_data.insert(String::from("CORE_ID"), core_id.to_string()); // nb of cores in a socket * socket_id + local_core_id s.safe_add_domain(Domain::new(2, String::from("dram"), String::from(""), 5, domain_sensor_data)) }, Err(e) => { @@ -586,35 +571,38 @@ impl Sensor for MsrRAPLSensor { } unsafe fn get_msr_value(core_id: usize, msr_addr: u64, sensor_data: &HashMap) -> Result { + let current_process = GetCurrentProcess(); + let current_thread = GetCurrentThread(); + let mut thread_group_affinity = GROUP_AFFINITY { Mask: 255, Group: 9, Reserved: [0,0,0] }; + let thread_affinity_res = GetThreadGroupAffinity(current_thread, &mut thread_group_affinity); + if thread_affinity_res.as_bool() { + warn!("Thread affinity found : {:?}", thread_group_affinity); + } else { + error!("Could'nt get thread group affinity"); + } + let mut process_group_array: [u16; 8] = [0,0,0,0,0,0,0,0]; + let mut process_group_array_len = 8; + let process_affinity_res = GetProcessGroupAffinity(current_process, &mut process_group_array_len, process_group_array.as_mut_ptr()); + if process_affinity_res.as_bool() { + warn!("Process affinity found: {:?}", process_group_array); + } else { + error!("Could'nt get process group affinity"); + error!("Error was : {:?}", GetLastError()); + } + warn!("Core ID requested to the driver : {}", core_id); match get_handle(sensor_data.get("DRIVER_NAME").unwrap()) { Ok(device) => { let mut msr_result: u64 = 0; let ptr_result = &mut msr_result as *mut u64; - let mut core_id: u32 = 0; - // get core numbers tied to the socket - match core_affinity::get_core_ids() { - Some(core_ids) => { - for c in core_ids { - if c.id == core_id as usize { - core_affinity::set_for_current(c); - warn!("Set core_affinity to {}", c.id); - break; - } - } - }, - None => { - warn!("Could'nt get core ids from core_affinity."); - } - } - //warn!("msr: {:x}", (MSR_PKG_ENERGY_STATUS as u64)); - //warn!("msr: {:b}", (MSR_PKG_ENERGY_STATUS as u64)); - //warn!("core_id: {:x} {:b}", (core_id as u64), (core_id as u64)); - //warn!("core_id: {:b}", ((core_id as u64) << 54)); - let src = ((core_id as u64) << 32) | msr_addr; + warn!("msr_addr: {:b}", msr_addr); + warn!("core_id: {:x} {:b}", (core_id as u64), (core_id as u64)); + warn!("core_id: {:b}", ((core_id as u64) << 32)); + let src = ((core_id as u64) << 32) | msr_addr; //let src = ((core_id as u64) << 32) | msr_addr; let ptr = &src as *const u64; - //warn!("src: {:x}", src); - //warn!("src: {:b}", src); + warn!("src: {:x}", src); + warn!("src: {:b}", src); + warn!("*ptr: {:b}", *ptr); //warn!("*ptr: {}", *ptr); //warn!("*ptr: {:b}", *ptr); From 7f5e721fafc11d196409ba8b7b09aa75f47b89e1 Mon Sep 17 00:00:00 2001 From: bpetit Date: Wed, 6 Dec 2023 17:17:18 +0100 Subject: [PATCH 24/33] chore: improved domains support and lowered verbosity of debug messages --- src/exporters/stdout.rs | 2 +- src/sensors/mod.rs | 4 ++ src/sensors/msr_rapl.rs | 90 +++++++++++++++++++++-------------------- 3 files changed, 52 insertions(+), 44 deletions(-) diff --git a/src/exporters/stdout.rs b/src/exporters/stdout.rs index 9044a46c..6ce78e28 100644 --- a/src/exporters/stdout.rs +++ b/src/exporters/stdout.rs @@ -134,7 +134,7 @@ impl StdoutExporter { .iter() .filter(|x| x.name == "scaph_socket_power_microwatts") { - warn!("✅ Found socket power metric !"); + debug!("✅ Found socket power metric !"); let power = format!("{}", s.metric_value).parse::().unwrap() / 1000000.0; let mut power_str = String::from("----"); if power > 0.0 { diff --git a/src/sensors/mod.rs b/src/sensors/mod.rs index 3fa8f302..0e2a0e3e 100644 --- a/src/sensors/mod.rs +++ b/src/sensors/mod.rs @@ -262,6 +262,10 @@ impl Topology { self.domains_names = Some(domain_names); } + pub fn set_domains_names(&mut self, names: Vec) { + self.domains_names = Some(names); + } + /// Adds a Domain instance to a given socket, if and only if the domain /// id doesn't exist already for the socket. pub fn safe_add_domain_to_socket( diff --git a/src/sensors/msr_rapl.rs b/src/sensors/msr_rapl.rs index aef55c66..131a7914 100644 --- a/src/sensors/msr_rapl.rs +++ b/src/sensors/msr_rapl.rs @@ -177,11 +177,11 @@ impl MsrRAPLSensor { impl RecordReader for Topology { fn read_record(&self) -> Result> { let mut res: u64 = 0; - warn!("Topology: I have {} sockets", self.sockets.len()); + debug!("Topology: I have {} sockets", self.sockets.len()); for s in &self.sockets { match s.read_record() { Ok(rec) => { - warn!("rec: {:?}", rec); + debug!("rec: {:?}", rec); res = res + rec.value.parse::()?; }, Err(e) => { @@ -246,12 +246,12 @@ impl RecordReader for CPUSocket { let mut thread_group_affinity: GROUP_AFFINITY = GROUP_AFFINITY { Mask: 255, Group: processorgroup_id, Reserved: [0,0,0] }; let thread_affinity = GetThreadGroupAffinity(current_thread, &mut thread_group_affinity); if thread_affinity.as_bool() { - warn!("got thead_affinity : {:?}", thread_group_affinity); + debug!("got thead_affinity : {:?}", thread_group_affinity); let core_id = self.cpu_cores.last().unwrap().id; //(self.cpu_cores.last().unwrap().id + self.id * self.cpu_cores.len() as u16) as usize let newaffinity = GROUP_AFFINITY { Mask: (self.cpu_cores.len() + self.id as usize * self.cpu_cores.len() - 1) as usize, Group: processorgroup_id, Reserved: [0, 0, 0]}; let res = SetThreadGroupAffinity(current_thread, &newaffinity, &mut thread_group_affinity); if res.as_bool() { - warn!("Asking get_msr_value, from socket, with core_id={}", core_id); + debug!("Asking get_msr_value, from socket, with core_id={}", core_id); match get_msr_value(core_id as usize, MSR_PKG_ENERGY_STATUS as u64, &self.sensor_data) { Ok(rec) => { return Ok(Record { timestamp: current_system_time_since_epoch(), value: rec.value, unit: super::units::Unit::MicroJoule }) @@ -278,10 +278,10 @@ impl RecordReader for Domain { fn read_record(&self) -> Result> { if let Some(core_id) = self.sensor_data.get("CORE_ID") { let usize_coreid = core_id.parse::().unwrap(); - warn!("Reading Domain {} on Core {}", self.name, usize_coreid); + debug!("Reading Domain {} on Core {}", self.name, usize_coreid); if let Some(msr_addr) = self.sensor_data.get("MSR_ADDR") { unsafe { - warn!("Asking, from Domain, get_msr_value with core_id={}", usize_coreid); + debug!("Asking, from Domain, get_msr_value with core_id={}", usize_coreid); match get_msr_value(usize_coreid, msr_addr.parse::().unwrap(), &self.sensor_data) { Ok(rec) => { return Ok(Record { @@ -325,7 +325,7 @@ impl Sensor for MsrRAPLSensor { let current_thread = GetCurrentThread(); let group_count = GetActiveProcessorGroupCount(); - warn!("GROUP COUNT : {}", group_count); + debug!("GROUP COUNT : {}", group_count); for group_id in 0..group_count { //TODO fix that to actually count the number of sockets @@ -350,40 +350,40 @@ impl Sensor for MsrRAPLSensor { } let mut i: u16 = 0; let mut no_more_sockets = false; - warn!("Entering ProcessorGroup {}", group_id); + debug!("Entering ProcessorGroup {}", group_id); let newaffinity = GROUP_AFFINITY { Mask: 255, Group: group_id, Reserved: [0, 0, 0]}; let mut thread_group_affinity: GROUP_AFFINITY = GROUP_AFFINITY { Mask: 255, Group: 0, Reserved: [0,0,0] }; let thread_affinity = GetThreadGroupAffinity(current_thread, &mut thread_group_affinity); - warn!("Thread group affinity result : {:?}", thread_affinity); + debug!("Thread group affinity result : {:?}", thread_affinity); if thread_affinity.as_bool() { - warn!("got thead_affinity : {:?}", thread_group_affinity); + debug!("got thead_affinity : {:?}", thread_group_affinity); let res = SetThreadGroupAffinity(current_thread, &newaffinity, &mut thread_group_affinity); if res.as_bool() { - warn!("Have set thread affinity: {:?}", newaffinity); + debug!("Have set thread affinity: {:?}", newaffinity); match core_affinity::get_core_ids() { Some(core_ids) => { - warn!("CPU SETUP - Cores from core_affinity, len={} : {:?}", core_ids.len(), core_ids); - warn!("CPU SETUP - Logical CPUs from sysinfo: {}", logical_cpus.len()); + debug!("CPU SETUP - Cores from core_affinity, len={} : {:?}", core_ids.len(), core_ids); + debug!("CPU SETUP - Logical CPUs from sysinfo: {}", logical_cpus.len()); while !no_more_sockets { let start = i * logical_cpus_from_cpuid; let stop = (i+1)*logical_cpus_from_cpuid; - warn!("Looping over {} .. {}", start, stop); + debug!("Looping over {} .. {}", start, stop); sensor_data.insert(String::from("PROCESSORGROUP_ID"), group_id.to_string()); let mut current_socket = CPUSocket::new(i, vec![], vec![], String::from(""),1, sensor_data.clone()); for c in start..stop {//core_ids { if core_affinity::set_for_current(CoreId { id: c.into() }) { match cpuid.get_vendor_info() { Some(info) => { - warn!("Got CPU {:?}", info); + debug!("Got CPU {:?}", info); }, None => { warn!("Couldn't get cpuinfo"); } } - warn!("Set core_affinity to {}", c); + debug!("Set core_affinity to {}", c); match cpuid.get_extended_topology_info() { Some(info) => { - warn!("Got CPU topo info {:?}", info); + debug!("Got CPU topo info {:?}", info); for t in info { if t.level_type() == TopologyType::Core { //nb_cpu_sockets = logical_cpus.len() as u16 / t.processors(); @@ -392,16 +392,16 @@ impl Sensor for MsrRAPLSensor { let socket_id = (x2apic_id & 240) >> 4; // upper bits of x2apic_id are socket_id, mask them, then bit shift to get socket_id current_socket.set_id(socket_id as u16); let core_id = x2apic_id & 15; // 4 last bits of x2apic_id are the core_id (per-socket) - warn!("Found socketid={} and coreid={}", socket_id, core_id); + debug!("Found socketid={} and coreid={}", socket_id, core_id); let mut attributes = HashMap::::new(); let ref_core = logical_cpus.first().unwrap(); attributes.insert(String::from("frequency"), ref_core.frequency().to_string()); attributes.insert(String::from("name"), ref_core.name().to_string()); attributes.insert(String::from("vendor_id"), ref_core.vendor_id().to_string()); attributes.insert(String::from("brand"), ref_core.brand().to_string()); - warn!("Adding core id {} to socket_id {}", ((i * (logical_cpus_from_cpuid - 1)) + core_id as u16), current_socket.id); + debug!("Adding core id {} to socket_id {}", ((i * (logical_cpus_from_cpuid - 1)) + core_id as u16), current_socket.id); current_socket.add_cpu_core(CPUCore::new((i * (logical_cpus_from_cpuid - 1)) + core_id as u16, attributes)); - warn!("Reviewing sockets : {:?}", topology.get_sockets_passive()); + debug!("Reviewing sockets : {:?}", topology.get_sockets_passive()); } } }, @@ -411,12 +411,12 @@ impl Sensor for MsrRAPLSensor { } } else { no_more_sockets = true; - warn!("There's likely to be no more socket to explore."); + debug!("There's likely to be no more socket to explore."); break; } } if !no_more_sockets { - warn!("inserting socket {:?}", current_socket); + debug!("inserting socket {:?}", current_socket); topology.safe_insert_socket(current_socket); i = i + 1; } @@ -430,7 +430,7 @@ impl Sensor for MsrRAPLSensor { if let Some(info) = CpuId::new().get_extended_topology_info() { for c in info { if c.level_type() == TopologyType::Core { - warn!("CPUID : {:?}", c); + debug!("CPUID : {:?}", c); } } } @@ -440,7 +440,7 @@ impl Sensor for MsrRAPLSensor { panic!("Error was : {:?}", last_error); } } else { - warn!("Getting thread group affinity failed !"); + panic!("Getting thread group affinity failed !"); let last_error = GetLastError(); panic!("Error was: {:?}", last_error); // win32 error 122 is insufficient buffer } @@ -506,46 +506,49 @@ impl Sensor for MsrRAPLSensor { //} //topology.add_cpu_cores(); - + let mut domains = vec![]; for s in topology.get_sockets() { - warn!("Inspecting CPUSocket: {:?}", s); + debug!("Inspecting CPUSocket: {:?}", s); unsafe { let core_id = s.get_cores_passive().last().unwrap().id + s.id * s.cpu_cores.len() as u16; - warn!("Asking get_msr_value, from generate_tpopo, with core_id={}", core_id); + debug!("Asking get_msr_value, from generate_tpopo, with core_id={}", core_id); match get_msr_value(core_id as usize, MSR_DRAM_ENERGY_STATUS as u64, &sensor_data) { Ok(rec) => { - warn!("Added domain Dram !"); + debug!("Added domain Dram !"); let mut domain_sensor_data = sensor_data.clone(); domain_sensor_data.insert(String::from("MSR_ADDR"), MSR_DRAM_ENERGY_STATUS.to_string()); domain_sensor_data.insert(String::from("CORE_ID"), core_id.to_string()); // nb of cores in a socket * socket_id + local_core_id + domains.push(String::from("dram")); s.safe_add_domain(Domain::new(2, String::from("dram"), String::from(""), 5, domain_sensor_data)) }, Err(e) => { - error!("Could'nt add Dram domain."); + warn!("Could'nt add Dram domain."); } } match get_msr_value(core_id as usize, MSR_PP0_ENERGY_STATUS as u64, &sensor_data) { Ok(rec) => { - warn!("Added domain Core !"); + debug!("Added domain Core !"); let mut domain_sensor_data = sensor_data.clone(); domain_sensor_data.insert(String::from("MSR_ADDR"), MSR_PP0_ENERGY_STATUS.to_string()); domain_sensor_data.insert(String::from("CORE_ID"), core_id.to_string()); + domains.push(String::from("core")); s.safe_add_domain(Domain::new(2, String::from("core"), String::from(""), 5, domain_sensor_data)) }, Err(e) => { - error!("Could'nt add Core domain."); + warn!("Could'nt add Core domain."); } } match get_msr_value(core_id as usize, MSR_PP1_ENERGY_STATUS as u64, &sensor_data) { Ok(rec) => { - warn!("Added domain Uncore !"); + debug!("Added domain Uncore !"); let mut domain_sensor_data = sensor_data.clone(); domain_sensor_data.insert(String::from("MSR_ADDR"), MSR_PP1_ENERGY_STATUS.to_string()); domain_sensor_data.insert(String::from("CORE_ID"), core_id.to_string()); + domains.push(String::from("uncore")); s.safe_add_domain(Domain::new(2, String::from("uncore"), String::from(""), 5, domain_sensor_data)) }, Err(e) => { - error!("Could'nt add Uncore domain."); + warn!("Could'nt add Uncore domain."); } } //match get_msr_value(core_id as usize, MSR_PLATFORM_ENERGY_STATUS as u64, &sensor_data) { @@ -558,6 +561,7 @@ impl Sensor for MsrRAPLSensor { } } + topology.set_domains_names(domains); Ok(topology) } @@ -576,7 +580,7 @@ unsafe fn get_msr_value(core_id: usize, msr_addr: u64, sensor_data: &HashMap { let mut msr_result: u64 = 0; let ptr_result = &mut msr_result as *mut u64; - warn!("msr_addr: {:b}", msr_addr); - warn!("core_id: {:x} {:b}", (core_id as u64), (core_id as u64)); - warn!("core_id: {:b}", ((core_id as u64) << 32)); + debug!("msr_addr: {:b}", msr_addr); + debug!("core_id: {:x} {:b}", (core_id as u64), (core_id as u64)); + debug!("core_id: {:b}", ((core_id as u64) << 32)); let src = ((core_id as u64) << 32) | msr_addr; //let src = ((core_id as u64) << 32) | msr_addr; let ptr = &src as *const u64; - warn!("src: {:x}", src); - warn!("src: {:b}", src); - warn!("*ptr: {:b}", *ptr); + debug!("src: {:x}", src); + debug!("src: {:b}", src); + debug!("*ptr: {:b}", *ptr); //warn!("*ptr: {}", *ptr); //warn!("*ptr: {:b}", *ptr); @@ -623,7 +627,7 @@ unsafe fn get_msr_value(core_id: usize, msr_addr: u64, sensor_data: &HashMap() .unwrap(); let current_value = MsrRAPLSensor::extract_rapl_current_power(msr_result, energy_unit); - warn!("current_value: {}", current_value); + debug!("current_value: {}", current_value); Ok(Record { timestamp: current_system_time_since_epoch(), From 1103155edd34a7dc9d47e123ce50d392ebd5f6cf Mon Sep 17 00:00:00 2001 From: bpetit Date: Thu, 4 Jan 2024 19:56:47 +0100 Subject: [PATCH 25/33] feat: enabled psys for windows --- src/exporters/mod.rs | 18 +-------- src/exporters/stdout.rs | 15 ++++++-- src/sensors/mod.rs | 83 ++++++++++++++++++++++++---------------- src/sensors/msr_rapl.rs | 84 +++++++++++++++++++++++++---------------- 4 files changed, 115 insertions(+), 85 deletions(-) diff --git a/src/exporters/mod.rs b/src/exporters/mod.rs index d458c475..c5e8f427 100644 --- a/src/exporters/mod.rs +++ b/src/exporters/mod.rs @@ -18,7 +18,7 @@ pub mod utils; pub mod warpten; use crate::sensors::{ utils::{current_system_time_since_epoch, IProcess}, - RecordGenerator, Topology, + RecordGenerator, Topology, Record }; use chrono::Utc; use std::collections::HashMap; @@ -644,22 +644,6 @@ impl MetricGenerator { metric_value: MetricValueType::Text(metric_value.value), }); - if let Some(psys) = self.topology.get_rapl_psys_energy_microjoules() { - self.data.push(Metric { - name: String::from("scaph_host_rapl_psys_microjoules"), - metric_type: String::from("counter"), - ttl: 60.0, - timestamp: psys.timestamp, - hostname: self.hostname.clone(), - state: String::from("ok"), - tags: vec!["scaphandre".to_string()], - attributes: HashMap::new(), - description: String::from( - "Raw extract of RAPL PSYS domain energy value, in microjoules", - ), - metric_value: MetricValueType::Text(psys.value), - }) - } } /// Generate socket metrics. diff --git a/src/exporters/stdout.rs b/src/exporters/stdout.rs index 6ce78e28..ad5fe321 100644 --- a/src/exporters/stdout.rs +++ b/src/exporters/stdout.rs @@ -111,8 +111,14 @@ impl StdoutExporter { fn summarized_view(&mut self, metrics: Vec) { let mut metrics_iter = metrics.iter(); let none_value = MetricValueType::Text("0".to_string()); + let mut host_power_source = String::from(""); let host_power = match metrics_iter.find(|x| x.name == "scaph_host_power_microwatts") { - Some(m) => &m.metric_value, + Some(m) => { + if let Some(src) = &m.attributes.get("value_source") { + host_power_source = src.to_string() + } + &m.metric_value + }, None => &none_value, }; @@ -122,8 +128,9 @@ impl StdoutExporter { } println!( - "Host:\t{} W", - (format!("{host_power}").parse::().unwrap() / 1000000.0) + "Host:\t{} W from {}", + (format!("{host_power}").parse::().unwrap() / 1000000.0), + host_power_source ); if domain_names.is_some() { @@ -144,6 +151,8 @@ impl StdoutExporter { let mut to_print = format!("Socket{socket_id}\t{power_str} W |\t"); + + let domains = metrics.iter().filter(|x| { x.name == "scaph_domain_power_microwatts" && x.attributes.get("socket_id").unwrap() == &socket_id diff --git a/src/sensors/mod.rs b/src/sensors/mod.rs index 0e2a0e3e..fd9f5518 100644 --- a/src/sensors/mod.rs +++ b/src/sensors/mod.rs @@ -3,8 +3,10 @@ //! `Sensor` is the root for all sensors. It defines the [Sensor] trait //! needed to implement a sensor. -#[cfg(not(target_os = "linux"))] +#[cfg(target_os = "windows")] pub mod msr_rapl; +#[cfg(target_os="windows")] +use msr_rapl::get_msr_value; #[cfg(target_os = "linux")] pub mod powercap_rapl; pub mod units; @@ -459,29 +461,35 @@ impl Topology { .record_buffer .get(self.record_buffer.len() - 2) .unwrap(); + match previous_record.value.trim().parse::() { + Ok(previous_microjoules) => { + match last_record.value.trim().parse::() { + Ok(last_microjoules) => { + if previous_microjoules > last_microjoules { + return None; + } + let microjoules = last_microjoules - previous_microjoules; + let time_diff = last_record.timestamp.as_secs_f64() + - previous_record.timestamp.as_secs_f64(); + let microwatts = microjoules as f64 / time_diff; + return Some(Record::new( + last_record.timestamp, + (microwatts as u64).to_string(), + units::Unit::MicroWatt, + )); + }, + Err(e) => { + warn!( + "Could'nt get previous_microjoules - value : '{}' - error : {:?}", + previous_record.value, e + ); + } - if let Ok(last_microjoules) = last_record.value.trim().parse::() { - if let Ok(previous_microjoules) = previous_record.value.trim().parse::() { - if previous_microjoules > last_microjoules { - return None; } - let microjoules = last_microjoules - previous_microjoules; - let time_diff = last_record.timestamp.as_secs_f64() - - previous_record.timestamp.as_secs_f64(); - let microwatts = microjoules as f64 / time_diff; - return Some(Record::new( - last_record.timestamp, - (microwatts as u64).to_string(), - units::Unit::MicroWatt, - )); - } else { - warn!( - "Could'nt get previous_microjoules: {}", - previous_record.value - ); + }, + Err(e) => { + warn!("Couldn't parse previous_microjoules - value : '{}' - error : {:?}", previous_record.value.trim(), e); } - } else { - warn!("Could'nt get last_microjoules: {}", last_record.value); } } None @@ -910,6 +918,7 @@ impl Topology { None } + #[cfg(target_os="linux")] pub fn get_rapl_psys_energy_microjoules(&self) -> Option { if let Some(psys) = self._sensor_data.get("psys") { match &fs::read_to_string(format!("{psys}/energy_uj")) { @@ -924,22 +933,30 @@ impl Topology { warn!("PSYS Error: {:?}", e); } } + } else { + debug!("Asked for PSYS but there is no psys entry in sensor_data."); + } + None + } + + #[cfg(target_os="windows")] + pub unsafe fn get_rapl_psys_energy_microjoules(&self) -> Option { + let msr_addr = msr_rapl::MSR_PLATFORM_ENERGY_STATUS; + match msr_rapl::get_msr_value(0, msr_addr.into(), &self._sensor_data) { + Ok(res) => { + return Some(Record::new( + current_system_time_since_epoch(), + res.value.to_string(), + units::Unit::MicroJoule + )) + }, + Err(e) => { + debug!("get_msr_value returned error : {}", e); + } } None } - //pub fn get_rapl_psys_power_microwatts(&self) -> Option { - // if let Some(psys) = self._sensor_data.get("psys") { - // if let Ok(val) = &fs::read_to_string(format!("{psys}/energy_uj")) { - // return Some(Record::new( - // current_system_time_since_epoch(), - // val.to_string(), - // units::Unit::MicroJoule - // )); - // } - // } - // None - //} } // !!!!!!!!!!!!!!!!! CPUSocket !!!!!!!!!!!!!!!!!!!!!!! diff --git a/src/sensors/msr_rapl.rs b/src/sensors/msr_rapl.rs index 131a7914..3f7097bd 100644 --- a/src/sensors/msr_rapl.rs +++ b/src/sensors/msr_rapl.rs @@ -20,9 +20,9 @@ use windows::Win32::System::SystemInformation::GROUP_AFFINITY; use core_affinity::{self, CoreId}; -use x86::cpuid; +pub use x86::cpuid; // Intel RAPL MSRs -use x86::msr::{ +pub use x86::msr::{ MSR_RAPL_POWER_UNIT, MSR_PKG_POWER_LIMIT, MSR_PKG_POWER_INFO, @@ -33,13 +33,13 @@ use x86::msr::{ MSR_PP0_PERF_STATUS, MSR_PP1_ENERGY_STATUS, }; -const MSR_PLATFORM_ENERGY_STATUS: u32 = 0x0000064d; -const MSR_PLATFORM_POWER_LIMIT: u32 = 0x0000065c ; +pub const MSR_PLATFORM_ENERGY_STATUS: u32 = 0x0000064d; +pub const MSR_PLATFORM_POWER_LIMIT: u32 = 0x0000065c ; // AMD RAPL MSRs -const MSR_AMD_RAPL_POWER_UNIT: u32 = 0xc0010299; -const MSR_AMD_CORE_ENERGY_STATUS: u32 = 0xc001029a; -const MSR_AMD_PKG_ENERGY_STATUS: u32 = 0xc001029b; +pub const MSR_AMD_RAPL_POWER_UNIT: u32 = 0xc0010299; +pub const MSR_AMD_CORE_ENERGY_STATUS: u32 = 0xc001029a; +pub const MSR_AMD_PKG_ENERGY_STATUS: u32 = 0xc001029b; unsafe fn ctl_code(device_type: u32, request_code: u32, method: u32, access: u32) -> u32 { @@ -176,24 +176,32 @@ impl MsrRAPLSensor { impl RecordReader for Topology { fn read_record(&self) -> Result> { - let mut res: u64 = 0; - debug!("Topology: I have {} sockets", self.sockets.len()); - for s in &self.sockets { - match s.read_record() { - Ok(rec) => { - debug!("rec: {:?}", rec); - res = res + rec.value.parse::()?; - }, - Err(e) => { - error!("Failed to get socket record : {:?}", e); + let mut record: Option = None; + unsafe { + record = self.get_rapl_psys_energy_microjoules(); + } + if let Some(psys_record) = record { + Ok(psys_record) + } else { + let mut res: u64 = 0; + debug!("Topology: I have {} sockets", self.sockets.len()); + for s in &self.sockets { + match s.read_record() { + Ok(rec) => { + debug!("rec: {:?}", rec); + res = res + rec.value.parse::()?; + }, + Err(e) => { + error!("Failed to get socket record : {:?}", e); + } } } + Ok(Record { + timestamp: current_system_time_since_epoch(), + unit: super::units::Unit::MicroJoule, + value: res.to_string(), + }) } - Ok(Record { - timestamp: current_system_time_since_epoch(), - unit: super::units::Unit::MicroJoule, - value: res.to_string(), - }) } } @@ -513,8 +521,8 @@ impl Sensor for MsrRAPLSensor { let core_id = s.get_cores_passive().last().unwrap().id + s.id * s.cpu_cores.len() as u16; debug!("Asking get_msr_value, from generate_tpopo, with core_id={}", core_id); match get_msr_value(core_id as usize, MSR_DRAM_ENERGY_STATUS as u64, &sensor_data) { - Ok(rec) => { - debug!("Added domain Dram !"); + Ok(_rec) => { + debug!("Adding domain Dram !"); let mut domain_sensor_data = sensor_data.clone(); domain_sensor_data.insert(String::from("MSR_ADDR"), MSR_DRAM_ENERGY_STATUS.to_string()); domain_sensor_data.insert(String::from("CORE_ID"), core_id.to_string()); // nb of cores in a socket * socket_id + local_core_id @@ -522,12 +530,12 @@ impl Sensor for MsrRAPLSensor { s.safe_add_domain(Domain::new(2, String::from("dram"), String::from(""), 5, domain_sensor_data)) }, Err(e) => { - warn!("Could'nt add Dram domain."); + warn!("Could'nt add Dram domain: {}", e); } } match get_msr_value(core_id as usize, MSR_PP0_ENERGY_STATUS as u64, &sensor_data) { - Ok(rec) => { - debug!("Added domain Core !"); + Ok(_rec) => { + debug!("Adding domain Core !"); let mut domain_sensor_data = sensor_data.clone(); domain_sensor_data.insert(String::from("MSR_ADDR"), MSR_PP0_ENERGY_STATUS.to_string()); domain_sensor_data.insert(String::from("CORE_ID"), core_id.to_string()); @@ -535,12 +543,12 @@ impl Sensor for MsrRAPLSensor { s.safe_add_domain(Domain::new(2, String::from("core"), String::from(""), 5, domain_sensor_data)) }, Err(e) => { - warn!("Could'nt add Core domain."); + warn!("Could'nt add Core domain: {}", e); } } match get_msr_value(core_id as usize, MSR_PP1_ENERGY_STATUS as u64, &sensor_data) { - Ok(rec) => { - debug!("Added domain Uncore !"); + Ok(_rec) => { + debug!("Adding domain Uncore !"); let mut domain_sensor_data = sensor_data.clone(); domain_sensor_data.insert(String::from("MSR_ADDR"), MSR_PP1_ENERGY_STATUS.to_string()); domain_sensor_data.insert(String::from("CORE_ID"), core_id.to_string()); @@ -548,7 +556,7 @@ impl Sensor for MsrRAPLSensor { s.safe_add_domain(Domain::new(2, String::from("uncore"), String::from(""), 5, domain_sensor_data)) }, Err(e) => { - warn!("Could'nt add Uncore domain."); + warn!("Could'nt add Uncore domain: {}", e); } } //match get_msr_value(core_id as usize, MSR_PLATFORM_ENERGY_STATUS as u64, &sensor_data) { @@ -561,6 +569,18 @@ impl Sensor for MsrRAPLSensor { } } + unsafe { + match get_msr_value(0, MSR_PLATFORM_ENERGY_STATUS as u64, &sensor_data) { + Ok(_rec) => { + debug!("Adding domain Platform / PSYS !"); + topology._sensor_data.insert(String::from("psys"), String::from("")); + }, + Err(e) => { + warn!("Could'nt add Uncore domain: {}", e); + } + } + } + topology.set_domains_names(domains); Ok(topology) } @@ -574,7 +594,7 @@ impl Sensor for MsrRAPLSensor { } } -unsafe fn get_msr_value(core_id: usize, msr_addr: u64, sensor_data: &HashMap) -> Result { +pub unsafe fn get_msr_value(core_id: usize, msr_addr: u64, sensor_data: &HashMap) -> Result { let current_process = GetCurrentProcess(); let current_thread = GetCurrentThread(); let mut thread_group_affinity = GROUP_AFFINITY { Mask: 255, Group: 9, Reserved: [0,0,0] }; From 4fec8333f4957846ca11ce39832de7603b975feb Mon Sep 17 00:00:00 2001 From: bpetit Date: Fri, 5 Jan 2024 13:54:26 +0100 Subject: [PATCH 26/33] style: clippy and fmt --- src/exporters/json.rs | 3 +- src/exporters/mod.rs | 18 +- src/exporters/prometheus.rs | 6 +- src/exporters/prometheuspush.rs | 11 +- src/exporters/qemu.rs | 2 +- src/exporters/riemann.rs | 2 +- src/exporters/stdout.rs | 7 +- src/lib.rs | 8 - src/main.rs | 64 +++--- src/sensors/mod.rs | 156 +++++++------- src/sensors/msr_rapl.rs | 346 ++++++++++++++++++++++---------- 11 files changed, 370 insertions(+), 253 deletions(-) diff --git a/src/exporters/json.rs b/src/exporters/json.rs index 6733e46f..c448cd4f 100644 --- a/src/exporters/json.rs +++ b/src/exporters/json.rs @@ -8,7 +8,6 @@ use std::{ path::{Path, PathBuf}, thread, time::{Duration, Instant}, - sync::mpsc::Receiver }; /// An Exporter that writes power consumption data of the host @@ -157,7 +156,7 @@ struct Report { impl Exporter for JsonExporter { /// Runs [iterate()] every `step` until `timeout` - fn run(&mut self, channel: &Receiver) { + fn run(&mut self) { let step = self.time_step; info!("Measurement step is: {step:?}"); diff --git a/src/exporters/mod.rs b/src/exporters/mod.rs index c5e8f427..60c6d857 100644 --- a/src/exporters/mod.rs +++ b/src/exporters/mod.rs @@ -18,14 +18,13 @@ pub mod utils; pub mod warpten; use crate::sensors::{ utils::{current_system_time_since_epoch, IProcess}, - RecordGenerator, Topology, Record + RecordGenerator, Topology, }; use chrono::Utc; use std::collections::HashMap; use std::fmt; use std::time::Duration; use utils::get_scaphandre_version; -use std::sync::mpsc::Receiver; #[cfg(feature = "containers")] use { docker_sync::{container::Container, Docker}, @@ -109,22 +108,10 @@ impl fmt::Debug for MetricValueType { /// with the structs provided by the sensor. pub trait Exporter { /// Runs the exporter. - fn run(&mut self, channel: &Receiver); + fn run(&mut self); /// The name of the kind of the exporter, for example "json". fn kind(&self) -> &str; - - fn watch_signal(&mut self, channel: &Receiver) -> Option { - match channel.try_recv() { - Ok(received) => { - info!("Received signal: {}", received); - Some(1) - }, - Err(_) => { - None - } - } - } } /// MetricGenerator is an exporter helper structure to collect Scaphandre metrics. @@ -643,7 +630,6 @@ impl MetricGenerator { description: String::from("Total swap space on the host, in bytes."), metric_value: MetricValueType::Text(metric_value.value), }); - } /// Generate socket metrics. diff --git a/src/exporters/prometheus.rs b/src/exporters/prometheus.rs index ad0e6150..9159c3a9 100644 --- a/src/exporters/prometheus.rs +++ b/src/exporters/prometheus.rs @@ -5,7 +5,7 @@ //! [scrape](https://prometheus.io/docs/prometheus/latest/getting_started). use super::utils; -use crate::current_system_time_since_epoch; +use crate::sensors::utils::current_system_time_since_epoch; use crate::exporters::{Exporter, MetricGenerator, MetricValueType}; use crate::sensors::{Sensor, Topology}; use chrono::Utc; @@ -16,9 +16,9 @@ use std::{ collections::HashMap, fmt::Write, net::{IpAddr, Ipv4Addr, SocketAddr}, + sync::mpsc::Receiver, sync::{Arc, Mutex}, time::Duration, - sync::mpsc::Receiver }; /// Default ipv4/ipv6 address to expose the service is any @@ -73,7 +73,7 @@ impl PrometheusExporter { impl Exporter for PrometheusExporter { /// Starts an HTTP server to expose the metrics in Prometheus format. - fn run(&mut self, channel: &Receiver) { + fn run(&mut self) { info!( "{}: Starting Prometheus exporter", Utc::now().format("%Y-%m-%dT%H:%M:%S") diff --git a/src/exporters/prometheuspush.rs b/src/exporters/prometheuspush.rs index 0ff558c2..73981e4f 100644 --- a/src/exporters/prometheuspush.rs +++ b/src/exporters/prometheuspush.rs @@ -13,7 +13,6 @@ use isahc::{prelude::*, Request}; use std::fmt::Write; use std::thread; use std::time::Duration; -use std::sync::mpsc::Receiver; pub struct PrometheusPushExporter { topo: Topology, @@ -73,7 +72,7 @@ impl PrometheusPushExporter { } impl Exporter for PrometheusPushExporter { - fn run(&mut self, channel: &Receiver) { + fn run(&mut self) { info!( "{}: Starting Prometheus Push exporter", Utc::now().format("%Y-%m-%dT%H:%M:%S") @@ -97,10 +96,6 @@ impl Exporter for PrometheusPushExporter { ); loop { - if self.watch_signal(channel).is_some() { - info!("Daemon/Service has received a stop signal."); - break; - } metric_generator.topology.refresh(); metric_generator.gen_all_metrics(); let mut body = String::from(""); @@ -159,10 +154,6 @@ impl Exporter for PrometheusPushExporter { } } - if self.watch_signal(channel).is_some() { - info!("Daemon/Service has received a stop signal."); - break; - } thread::sleep(Duration::new(self.args.step, 0)); } } diff --git a/src/exporters/qemu.rs b/src/exporters/qemu.rs index 239293de..829645e6 100644 --- a/src/exporters/qemu.rs +++ b/src/exporters/qemu.rs @@ -1,8 +1,8 @@ use crate::exporters::Exporter; use crate::sensors::Topology; use crate::sensors::{utils::ProcessRecord, Sensor}; -use std::{fs, io, thread, time}; use std::sync::mpsc::Receiver; +use std::{fs, io, thread, time}; /// An Exporter that extracts power consumption data of running /// Qemu/KVM virtual machines on the host and store those data diff --git a/src/exporters/riemann.rs b/src/exporters/riemann.rs index 94843e2e..e2abadac 100644 --- a/src/exporters/riemann.rs +++ b/src/exporters/riemann.rs @@ -10,8 +10,8 @@ use riemann_client::proto::{Attribute, Event}; use riemann_client::Client; use std::collections::HashMap; use std::convert::TryFrom; -use std::time::{Duration, SystemTime, UNIX_EPOCH}; use std::sync::mpsc::Receiver; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; /// Riemann server default ipv4/ipv6 address const DEFAULT_IP_ADDRESS: &str = "localhost"; diff --git a/src/exporters/stdout.rs b/src/exporters/stdout.rs index ad5fe321..ce50c727 100644 --- a/src/exporters/stdout.rs +++ b/src/exporters/stdout.rs @@ -4,7 +4,6 @@ use regex::Regex; use std::fmt::Write; use std::thread; use std::time::{Duration, Instant}; -use std::sync::mpsc::Receiver; /// An Exporter that displays power consumption data of the host /// and its processes on the standard output of the terminal. @@ -54,7 +53,7 @@ pub struct ExporterArgs { impl Exporter for StdoutExporter { /// Runs [iterate()] every `step` until `timeout` - fn run(&mut self, channel: &Receiver) { + fn run(&mut self) { let time_step = Duration::from_secs(self.args.step); let time_limit = if self.args.timeout < 0 { None @@ -118,7 +117,7 @@ impl StdoutExporter { host_power_source = src.to_string() } &m.metric_value - }, + } None => &none_value, }; @@ -151,8 +150,6 @@ impl StdoutExporter { let mut to_print = format!("Socket{socket_id}\t{power_str} W |\t"); - - let domains = metrics.iter().filter(|x| { x.name == "scaph_domain_power_microwatts" && x.attributes.get("socket_id").unwrap() == &socket_id diff --git a/src/lib.rs b/src/lib.rs index 60b65c46..af59dc34 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,8 +14,6 @@ use sensors::msr_rapl; #[cfg(target_os = "linux")] use sensors::powercap_rapl; -use std::time::{Duration, SystemTime}; - /// Create a new [`Sensor`] instance with the default sensor available, /// with its default options. pub fn get_default_sensor() -> impl sensors::Sensor { @@ -30,12 +28,6 @@ pub fn get_default_sensor() -> impl sensors::Sensor { return msr_rapl::MsrRAPLSensor::new(); } -fn current_system_time_since_epoch() -> Duration { - SystemTime::now() - .duration_since(SystemTime::UNIX_EPOCH) - .unwrap() -} - // Copyright 2020 The scaphandre authors. // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/main.rs b/src/main.rs index 187fd3fc..320c0cb9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,7 +3,6 @@ use clap::{command, ArgAction, Parser, Subcommand}; use colored::Colorize; use scaphandre::{exporters, sensors::Sensor}; -use std::sync::mpsc::{self, Receiver, Sender}; use std::thread; #[cfg(target_os = "linux")] @@ -21,7 +20,7 @@ use windows_service::{ service::ServiceStatus, service::ServiceType, service_control_handler::{self, ServiceControlHandlerResult}, - service_dispatcher, Result, + service_dispatcher }; #[cfg(target_os = "windows")] @@ -113,14 +112,13 @@ enum ExporterChoice { } #[cfg(target_os = "windows")] -fn my_service_main(arguments: Vec) { +fn my_service_main(_arguments: Vec) { use std::thread::JoinHandle; let graceful_period = 3; - let (tx, rx) = mpsc::channel(); let start_status = ServiceStatus { service_type: ServiceType::OWN_PROCESS, // Should match the one from system service registry - current_state: ServiceState::Running, // The new state + current_state: ServiceState::Running, // The new state controls_accepted: ServiceControlAccept::STOP, // Accept stop events when running exit_code: ServiceExitCode::Win32(0), // Used to report an error when starting or stopping only, otherwise must be zero checkpoint: 0, // Only used for pending states, otherwise must be zero @@ -134,7 +132,7 @@ fn my_service_main(arguments: Vec) { exit_code: ServiceExitCode::Win32(0), checkpoint: 0, wait_hint: Duration::default(), - process_id: None + process_id: None, }; let stoppending_status = ServiceStatus { service_type: ServiceType::OWN_PROCESS, @@ -143,18 +141,17 @@ fn my_service_main(arguments: Vec) { exit_code: ServiceExitCode::Win32(0), checkpoint: 0, wait_hint: Duration::from_secs(graceful_period), - process_id: None + process_id: None, }; - let mut thread_handle: Option> = None; - let mut stop = false; + let thread_handle: Option>; + let mut _stop = false; let event_handler = move |control_event| -> ServiceControlHandlerResult { println!("Got service control event: {:?}", control_event); match control_event { ServiceControl::Stop => { // Handle stop event and return control back to the system. - stop = true; - let _ = &tx.send(1); + _stop = true; ServiceControlHandlerResult::NoError } // All services must accept Interrogate even if it's a no-op. @@ -164,27 +161,42 @@ fn my_service_main(arguments: Vec) { }; if let Ok(system_handler) = service_control_handler::register("scaphandre", event_handler) { - // Tell the system that the service is running now and run it + // Tell the system that the service is running now and run it match system_handler.set_service_status(start_status.clone()) { Ok(status_set) => { - println!("Starting main thread, service status has been set: {:?}", status_set); - thread_handle = Some(thread::spawn(move || { parse_cli_and_run_exporter(&rx); })); - }, + println!( + "Starting main thread, service status has been set: {:?}", + status_set + ); + thread_handle = Some(thread::spawn(move || { + parse_cli_and_run_exporter(); + })); + } Err(e) => { panic!("Couldn't set Windows service status. Error: {:?}", e); } } loop { - if stop { + if _stop { // Wait for the thread to finnish, then end the current function match system_handler.set_service_status(stoppending_status.clone()) { Ok(status_set) => { println!("Stop status has been set for service: {:?}", status_set); if let Some(thr) = thread_handle { - if let Ok(_) = thr.join() { + if thr.join().is_ok() { match system_handler.set_service_status(stop_status.clone()) { - Ok(laststatus_set) => {println!("Scaphandre gracefully stopped: {:?}", laststatus_set);}, - Err(e) => {panic!("Could'nt set Stop status on scaphandre service: {:?}", e);} + Ok(laststatus_set) => { + println!( + "Scaphandre gracefully stopped: {:?}", + laststatus_set + ); + } + Err(e) => { + panic!( + "Could'nt set Stop status on scaphandre service: {:?}", + e + ); + } } } else { panic!("Joining the thread failed."); @@ -193,7 +205,7 @@ fn my_service_main(arguments: Vec) { } else { panic!("Thread handle was not initialized."); } - }, + } Err(e) => { panic!("Couldn't set Windows service status. Error: {:?}", e); } @@ -214,12 +226,10 @@ fn main() { } } - let (_, rx) = mpsc::channel(); - - parse_cli_and_run_exporter(&rx); + parse_cli_and_run_exporter(); } -fn parse_cli_and_run_exporter(channel: &Receiver) { +fn parse_cli_and_run_exporter() { let cli = Cli::parse(); loggerv::init_with_verbosity(cli.verbose.into()).expect("unable to initialize the logger"); @@ -229,7 +239,7 @@ fn parse_cli_and_run_exporter(channel: &Receiver) { print_scaphandre_header(exporter.kind()); } - exporter.run(channel); + exporter.run(); } fn build_exporter(choice: ExporterChoice, sensor: &dyn Sensor) -> Box { @@ -280,9 +290,7 @@ fn build_sensor(cli: &Cli) -> impl Sensor { }; #[cfg(target_os = "windows")] - let msr_sensor_win = || { - msr_rapl::MsrRAPLSensor::new() - }; + let msr_sensor_win = msr_rapl::MsrRAPLSensor::new; match cli.sensor.as_deref() { Some("powercap_rapl") => { diff --git a/src/sensors/mod.rs b/src/sensors/mod.rs index fd9f5518..40dd4547 100644 --- a/src/sensors/mod.rs +++ b/src/sensors/mod.rs @@ -5,7 +5,7 @@ #[cfg(target_os = "windows")] pub mod msr_rapl; -#[cfg(target_os="windows")] +#[cfg(target_os = "windows")] use msr_rapl::get_msr_value; #[cfg(target_os = "linux")] pub mod powercap_rapl; @@ -227,13 +227,10 @@ impl Topology { } } - pub fn safe_insert_socket( - &mut self, - socket: CPUSocket - ) { + pub fn safe_insert_socket(&mut self, socket: CPUSocket) { if !self.sockets.iter().any(|s| s.id == socket.id) { self.sockets.push(socket); - } + } } /// Returns a immutable reference to self.proc_tracker @@ -296,53 +293,53 @@ impl Topology { /// Generates CPUCore instances for the host and adds them /// to appropriate CPUSocket instance from self.sockets + #[cfg(target_os = "linux")] pub fn add_cpu_cores(&mut self) { if let Some(mut cores) = Topology::generate_cpu_cores() { - #[cfg(target_os = "linux")] { - while let Some(c) = cores.pop() { - let socket_id = &c - .attributes - .get("physical id") - .unwrap() - .parse::() - .unwrap(); - let socket_match = self.sockets.iter_mut().find(|x| &x.id == socket_id); - - //In VMs there might be a missmatch betwen Sockets and Cores - see Issue#133 as a first fix we just map all cores that can't be mapped to the first - let socket = match socket_match { - Some(x) => x, - None =>self.sockets.first_mut().expect("Trick: if you are running on a vm, do not forget to use --vm parameter invoking scaphandre at the command line") - }; - - if socket_id == &socket.id { - socket.add_cpu_core(c); - } else { - socket.add_cpu_core(c); - warn!("coud't not match core to socket - mapping to first socket instead - if you are not using --vm there is something wrong") - } + while let Some(c) = cores.pop() { + let socket_id = &c + .attributes + .get("physical id") + .unwrap() + .parse::() + .unwrap(); + let socket_match = self.sockets.iter_mut().find(|x| &x.id == socket_id); + + //In VMs there might be a missmatch betwen Sockets and Cores - see Issue#133 as a first fix we just map all cores that can't be mapped to the first + let socket = match socket_match { + Some(x) => x, + None =>self.sockets.first_mut().expect("Trick: if you are running on a vm, do not forget to use --vm parameter invoking scaphandre at the command line") + }; + + if socket_id == &socket.id { + socket.add_cpu_core(c); + } else { + socket.add_cpu_core(c); + warn!("coud't not match core to socket - mapping to first socket instead - if you are not using --vm there is something wrong") } } + //#[cfg(target_os = "windows")] //{ - //TODO: fix - //let nb_sockets = &self.sockets.len(); - //let mut socket_counter = 0; - //let nb_cores_per_socket = &cores.len() / nb_sockets; - //warn!("nb_cores_per_socket: {} cores_len: {} sockets_len: {}", nb_cores_per_socket, &cores.len(), &self.sockets.len()); - //for s in self.sockets.iter_mut() { - // for c in (socket_counter * nb_cores_per_socket)..((socket_counter+1) * nb_cores_per_socket) { - // match cores.pop() { - // Some(core) => { - // warn!("adding core {} to socket {}", core.id, s.id); - // s.add_cpu_core(core); - // }, - // None => { - // error!("Uneven number of CPU cores !"); - // } - // } - // } - // socket_counter = socket_counter + 1; - //} + //TODO: fix + //let nb_sockets = &self.sockets.len(); + //let mut socket_counter = 0; + //let nb_cores_per_socket = &cores.len() / nb_sockets; + //warn!("nb_cores_per_socket: {} cores_len: {} sockets_len: {}", nb_cores_per_socket, &cores.len(), &self.sockets.len()); + //for s in self.sockets.iter_mut() { + // for c in (socket_counter * nb_cores_per_socket)..((socket_counter+1) * nb_cores_per_socket) { + // match cores.pop() { + // Some(core) => { + // warn!("adding core {} to socket {}", core.id, s.id); + // s.add_cpu_core(core); + // }, + // None => { + // error!("Uneven number of CPU cores !"); + // } + // } + // } + // socket_counter = socket_counter + 1; + //} //} } else { panic!("Couldn't retrieve any CPU Core from the topology. (generate_cpu_cores)"); @@ -462,33 +459,34 @@ impl Topology { .get(self.record_buffer.len() - 2) .unwrap(); match previous_record.value.trim().parse::() { - Ok(previous_microjoules) => { - match last_record.value.trim().parse::() { - Ok(last_microjoules) => { - if previous_microjoules > last_microjoules { - return None; - } - let microjoules = last_microjoules - previous_microjoules; - let time_diff = last_record.timestamp.as_secs_f64() - - previous_record.timestamp.as_secs_f64(); - let microwatts = microjoules as f64 / time_diff; - return Some(Record::new( - last_record.timestamp, - (microwatts as u64).to_string(), - units::Unit::MicroWatt, - )); - }, - Err(e) => { - warn!( - "Could'nt get previous_microjoules - value : '{}' - error : {:?}", - previous_record.value, e - ); + Ok(previous_microjoules) => match last_record.value.trim().parse::() { + Ok(last_microjoules) => { + if previous_microjoules > last_microjoules { + return None; } - + let microjoules = last_microjoules - previous_microjoules; + let time_diff = last_record.timestamp.as_secs_f64() + - previous_record.timestamp.as_secs_f64(); + let microwatts = microjoules as f64 / time_diff; + return Some(Record::new( + last_record.timestamp, + (microwatts as u64).to_string(), + units::Unit::MicroWatt, + )); + } + Err(e) => { + warn!( + "Could'nt get previous_microjoules - value : '{}' - error : {:?}", + previous_record.value, e + ); } }, Err(e) => { - warn!("Couldn't parse previous_microjoules - value : '{}' - error : {:?}", previous_record.value.trim(), e); + warn!( + "Couldn't parse previous_microjoules - value : '{}' - error : {:?}", + previous_record.value.trim(), + e + ); } } } @@ -918,7 +916,7 @@ impl Topology { None } - #[cfg(target_os="linux")] + #[cfg(target_os = "linux")] pub fn get_rapl_psys_energy_microjoules(&self) -> Option { if let Some(psys) = self._sensor_data.get("psys") { match &fs::read_to_string(format!("{psys}/energy_uj")) { @@ -939,24 +937,30 @@ impl Topology { None } - #[cfg(target_os="windows")] + /// # Safety + /// + /// This function is unsafe rust as it calls get_msr_value function from msr_rapl sensor module. + /// It calls the msr_RAPL::MSR_PLATFORM_ENERGY_STATUS MSR address, which has been tested on several Intel x86 processors + /// but might fail on AMD (needs testing). That being said, it returns None if the msr query fails (which means if the Windows + /// driver fails.) and should not prevent from using a value coming from elsewhere, which means from another get_msr_value calls + /// targeting another msr address. + #[cfg(target_os = "windows")] pub unsafe fn get_rapl_psys_energy_microjoules(&self) -> Option { let msr_addr = msr_rapl::MSR_PLATFORM_ENERGY_STATUS; - match msr_rapl::get_msr_value(0, msr_addr.into(), &self._sensor_data) { + match get_msr_value(0, msr_addr.into(), &self._sensor_data) { Ok(res) => { return Some(Record::new( current_system_time_since_epoch(), res.value.to_string(), - units::Unit::MicroJoule + units::Unit::MicroJoule, )) - }, + } Err(e) => { debug!("get_msr_value returned error : {}", e); } } None } - } // !!!!!!!!!!!!!!!!! CPUSocket !!!!!!!!!!!!!!!!!!!!!!! diff --git a/src/sensors/msr_rapl.rs b/src/sensors/msr_rapl.rs index 3f7097bd..1a0ae715 100644 --- a/src/sensors/msr_rapl.rs +++ b/src/sensors/msr_rapl.rs @@ -1,47 +1,40 @@ use crate::sensors::utils::current_system_time_since_epoch; -use crate::sensors::{CPUSocket, Domain, Record, RecordReader, Sensor, Topology, CPUCore}; +use crate::sensors::{CPUCore, CPUSocket, Domain, Record, RecordReader, Sensor, Topology}; +use raw_cpuid::{CpuId, TopologyType}; use std::collections::HashMap; use std::error::Error; use std::mem::size_of; -use sysinfo::{System, SystemExt, CpuExt, Cpu}; -use raw_cpuid::{CpuId, TopologyType}; +use sysinfo::{CpuExt, System, SystemExt}; use windows::Win32::Foundation::{CloseHandle, GetLastError, HANDLE, INVALID_HANDLE_VALUE}; use windows::Win32::Storage::FileSystem::{ CreateFileW, FILE_FLAG_OVERLAPPED, FILE_GENERIC_READ, FILE_GENERIC_WRITE, FILE_READ_DATA, FILE_SHARE_READ, FILE_SHARE_WRITE, FILE_WRITE_DATA, OPEN_EXISTING, }; use windows::Win32::System::Ioctl::{FILE_DEVICE_UNKNOWN, METHOD_BUFFERED}; -use windows::Win32::System::IO::DeviceIoControl; +use windows::Win32::System::SystemInformation::GROUP_AFFINITY; use windows::Win32::System::Threading::{ - GetThreadGroupAffinity, GetProcessGroupAffinity, GetCurrentProcess, GetProcessInformation, - GetCurrentThread, GetActiveProcessorGroupCount, SetThreadGroupAffinity + GetActiveProcessorGroupCount, GetCurrentProcess, GetCurrentThread, GetProcessGroupAffinity, + GetThreadGroupAffinity, SetThreadGroupAffinity, }; -use windows::Win32::System::SystemInformation::GROUP_AFFINITY; +use windows::Win32::System::IO::DeviceIoControl; use core_affinity::{self, CoreId}; pub use x86::cpuid; // Intel RAPL MSRs pub use x86::msr::{ + MSR_DRAM_ENERGY_STATUS, MSR_DRAM_PERF_STATUS, MSR_PKG_ENERGY_STATUS, MSR_PKG_POWER_INFO, + MSR_PKG_POWER_LIMIT, MSR_PP0_ENERGY_STATUS, MSR_PP0_PERF_STATUS, MSR_PP1_ENERGY_STATUS, MSR_RAPL_POWER_UNIT, - MSR_PKG_POWER_LIMIT, - MSR_PKG_POWER_INFO, - MSR_PKG_ENERGY_STATUS, - MSR_DRAM_ENERGY_STATUS, - MSR_DRAM_PERF_STATUS, - MSR_PP0_ENERGY_STATUS, - MSR_PP0_PERF_STATUS, - MSR_PP1_ENERGY_STATUS, }; pub const MSR_PLATFORM_ENERGY_STATUS: u32 = 0x0000064d; -pub const MSR_PLATFORM_POWER_LIMIT: u32 = 0x0000065c ; +pub const MSR_PLATFORM_POWER_LIMIT: u32 = 0x0000065c; // AMD RAPL MSRs pub const MSR_AMD_RAPL_POWER_UNIT: u32 = 0xc0010299; pub const MSR_AMD_CORE_ENERGY_STATUS: u32 = 0xc001029a; pub const MSR_AMD_PKG_ENERGY_STATUS: u32 = 0xc001029b; - unsafe fn ctl_code(device_type: u32, request_code: u32, method: u32, access: u32) -> u32 { ((device_type) << 16) | ((access) << 14) | ((request_code) << 2) | (method) } @@ -176,7 +169,7 @@ impl MsrRAPLSensor { impl RecordReader for Topology { fn read_record(&self) -> Result> { - let mut record: Option = None; + let record: Option; unsafe { record = self.get_rapl_psys_energy_microjoules(); } @@ -189,8 +182,8 @@ impl RecordReader for Topology { match s.read_record() { Ok(rec) => { debug!("rec: {:?}", rec); - res = res + rec.value.parse::()?; - }, + res += rec.value.parse::()?; + } Err(e) => { error!("Failed to get socket record : {:?}", e); } @@ -250,32 +243,65 @@ impl RecordReader for CPUSocket { fn read_record(&self) -> Result> { unsafe { let current_thread = GetCurrentThread(); - let processorgroup_id = self.sensor_data.get("PROCESSORGROUP_ID").unwrap().parse::().unwrap(); - let mut thread_group_affinity: GROUP_AFFINITY = GROUP_AFFINITY { Mask: 255, Group: processorgroup_id, Reserved: [0,0,0] }; - let thread_affinity = GetThreadGroupAffinity(current_thread, &mut thread_group_affinity); + let processorgroup_id = self + .sensor_data + .get("PROCESSORGROUP_ID") + .unwrap() + .parse::() + .unwrap(); + let mut thread_group_affinity: GROUP_AFFINITY = GROUP_AFFINITY { + Mask: 255, + Group: processorgroup_id, + Reserved: [0, 0, 0], + }; + let thread_affinity = + GetThreadGroupAffinity(current_thread, &mut thread_group_affinity); if thread_affinity.as_bool() { debug!("got thead_affinity : {:?}", thread_group_affinity); let core_id = self.cpu_cores.last().unwrap().id; //(self.cpu_cores.last().unwrap().id + self.id * self.cpu_cores.len() as u16) as usize - let newaffinity = GROUP_AFFINITY { Mask: (self.cpu_cores.len() + self.id as usize * self.cpu_cores.len() - 1) as usize, Group: processorgroup_id, Reserved: [0, 0, 0]}; - let res = SetThreadGroupAffinity(current_thread, &newaffinity, &mut thread_group_affinity); + let newaffinity = GROUP_AFFINITY { + Mask: self.cpu_cores.len() + self.id as usize * self.cpu_cores.len() - 1, + Group: processorgroup_id, + Reserved: [0, 0, 0], + }; + let res = SetThreadGroupAffinity( + current_thread, + &newaffinity, + &mut thread_group_affinity, + ); if res.as_bool() { - debug!("Asking get_msr_value, from socket, with core_id={}", core_id); - match get_msr_value(core_id as usize, MSR_PKG_ENERGY_STATUS as u64, &self.sensor_data) { + debug!( + "Asking get_msr_value, from socket, with core_id={}", + core_id + ); + match get_msr_value( + core_id as usize, + MSR_PKG_ENERGY_STATUS as u64, + &self.sensor_data, + ) { Ok(rec) => { - return Ok(Record { timestamp: current_system_time_since_epoch(), value: rec.value, unit: super::units::Unit::MicroJoule }) - }, + Ok(Record { + timestamp: current_system_time_since_epoch(), + value: rec.value, + unit: super::units::Unit::MicroJoule, + }) + } Err(e) => { - error!("Could'nt get MSR value for {}: {}", MSR_PKG_ENERGY_STATUS, e); - return Ok(Record { + error!( + "Could'nt get MSR value for {}: {}", + MSR_PKG_ENERGY_STATUS, e + ); + Ok(Record { timestamp: current_system_time_since_epoch(), value: String::from("0"), - unit: super::units::Unit::MicroJoule + unit: super::units::Unit::MicroJoule, }) } } } else { panic!("Couldn't set Thread affinity !"); } + //TODO add DRAM domain to result when available } else { panic!("Coudld'nt get Thread affinity !"); } @@ -289,21 +315,28 @@ impl RecordReader for Domain { debug!("Reading Domain {} on Core {}", self.name, usize_coreid); if let Some(msr_addr) = self.sensor_data.get("MSR_ADDR") { unsafe { - debug!("Asking, from Domain, get_msr_value with core_id={}", usize_coreid); - match get_msr_value(usize_coreid, msr_addr.parse::().unwrap(), &self.sensor_data) { + debug!( + "Asking, from Domain, get_msr_value with core_id={}", + usize_coreid + ); + match get_msr_value( + usize_coreid, + msr_addr.parse::().unwrap(), + &self.sensor_data, + ) { Ok(rec) => { - return Ok(Record { + Ok(Record { timestamp: current_system_time_since_epoch(), unit: super::units::Unit::MicroJoule, value: rec.value, }) - }, + } Err(e) => { error!("Could'nt get MSR value for {}: {}", msr_addr, e); - Ok(Record { + Ok(Record { timestamp: current_system_time_since_epoch(), value: String::from("0"), - unit: super::units::Unit::MicroJoule + unit: super::units::Unit::MicroJoule, }) } } @@ -337,8 +370,7 @@ impl Sensor for MsrRAPLSensor { for group_id in 0..group_count { //TODO fix that to actually count the number of sockets - let logical_cpus = sys.cpus() ; - let mut nb_cpu_sockets: u16 = 0; + let logical_cpus = sys.cpus(); let cpuid = CpuId::new(); let mut logical_cpus_from_cpuid = 1; match cpuid.get_extended_topology_info() { @@ -348,7 +380,7 @@ impl Sensor for MsrRAPLSensor { logical_cpus_from_cpuid = t.processors(); } } - }, + } None => { panic!("Could'nt get cpuid data."); } @@ -359,31 +391,62 @@ impl Sensor for MsrRAPLSensor { let mut i: u16 = 0; let mut no_more_sockets = false; debug!("Entering ProcessorGroup {}", group_id); - let newaffinity = GROUP_AFFINITY { Mask: 255, Group: group_id, Reserved: [0, 0, 0]}; - let mut thread_group_affinity: GROUP_AFFINITY = GROUP_AFFINITY { Mask: 255, Group: 0, Reserved: [0,0,0] }; - let thread_affinity = GetThreadGroupAffinity(current_thread, &mut thread_group_affinity); + let newaffinity = GROUP_AFFINITY { + Mask: 255, + Group: group_id, + Reserved: [0, 0, 0], + }; + let mut thread_group_affinity: GROUP_AFFINITY = GROUP_AFFINITY { + Mask: 255, + Group: 0, + Reserved: [0, 0, 0], + }; + let thread_affinity = + GetThreadGroupAffinity(current_thread, &mut thread_group_affinity); debug!("Thread group affinity result : {:?}", thread_affinity); if thread_affinity.as_bool() { debug!("got thead_affinity : {:?}", thread_group_affinity); - let res = SetThreadGroupAffinity(current_thread, &newaffinity, &mut thread_group_affinity); + let res = SetThreadGroupAffinity( + current_thread, + &newaffinity, + &mut thread_group_affinity, + ); if res.as_bool() { debug!("Have set thread affinity: {:?}", newaffinity); match core_affinity::get_core_ids() { Some(core_ids) => { - debug!("CPU SETUP - Cores from core_affinity, len={} : {:?}", core_ids.len(), core_ids); - debug!("CPU SETUP - Logical CPUs from sysinfo: {}", logical_cpus.len()); + debug!( + "CPU SETUP - Cores from core_affinity, len={} : {:?}", + core_ids.len(), + core_ids + ); + debug!( + "CPU SETUP - Logical CPUs from sysinfo: {}", + logical_cpus.len() + ); while !no_more_sockets { let start = i * logical_cpus_from_cpuid; - let stop = (i+1)*logical_cpus_from_cpuid; + let stop = (i + 1) * logical_cpus_from_cpuid; debug!("Looping over {} .. {}", start, stop); - sensor_data.insert(String::from("PROCESSORGROUP_ID"), group_id.to_string()); - let mut current_socket = CPUSocket::new(i, vec![], vec![], String::from(""),1, sensor_data.clone()); - for c in start..stop {//core_ids { + sensor_data.insert( + String::from("PROCESSORGROUP_ID"), + group_id.to_string(), + ); + let mut current_socket = CPUSocket::new( + i, + vec![], + vec![], + String::from(""), + 1, + sensor_data.clone(), + ); + for c in start..stop { + //core_ids { if core_affinity::set_for_current(CoreId { id: c.into() }) { match cpuid.get_vendor_info() { Some(info) => { debug!("Got CPU {:?}", info); - }, + } None => { warn!("Couldn't get cpuinfo"); } @@ -394,43 +457,76 @@ impl Sensor for MsrRAPLSensor { debug!("Got CPU topo info {:?}", info); for t in info { if t.level_type() == TopologyType::Core { - //nb_cpu_sockets = logical_cpus.len() as u16 / t.processors(); //logical_cpus_from_cpuid = t.processors() let x2apic_id = t.x2apic_id(); let socket_id = (x2apic_id & 240) >> 4; // upper bits of x2apic_id are socket_id, mask them, then bit shift to get socket_id current_socket.set_id(socket_id as u16); let core_id = x2apic_id & 15; // 4 last bits of x2apic_id are the core_id (per-socket) - debug!("Found socketid={} and coreid={}", socket_id, core_id); - let mut attributes = HashMap::::new(); - let ref_core = logical_cpus.first().unwrap(); - attributes.insert(String::from("frequency"), ref_core.frequency().to_string()); - attributes.insert(String::from("name"), ref_core.name().to_string()); - attributes.insert(String::from("vendor_id"), ref_core.vendor_id().to_string()); - attributes.insert(String::from("brand"), ref_core.brand().to_string()); - debug!("Adding core id {} to socket_id {}", ((i * (logical_cpus_from_cpuid - 1)) + core_id as u16), current_socket.id); - current_socket.add_cpu_core(CPUCore::new((i * (logical_cpus_from_cpuid - 1)) + core_id as u16, attributes)); - debug!("Reviewing sockets : {:?}", topology.get_sockets_passive()); + debug!( + "Found socketid={} and coreid={}", + socket_id, core_id + ); + let mut attributes = + HashMap::::new(); + let ref_core = + logical_cpus.first().unwrap(); + attributes.insert( + String::from("frequency"), + ref_core.frequency().to_string(), + ); + attributes.insert( + String::from("name"), + ref_core.name().to_string(), + ); + attributes.insert( + String::from("vendor_id"), + ref_core.vendor_id().to_string(), + ); + attributes.insert( + String::from("brand"), + ref_core.brand().to_string(), + ); + debug!( + "Adding core id {} to socket_id {}", + ((i * (logical_cpus_from_cpuid + - 1)) + + core_id as u16), + current_socket.id + ); + current_socket.add_cpu_core( + CPUCore::new( + (i * (logical_cpus_from_cpuid + - 1)) + + core_id as u16, + attributes, + ), + ); + debug!( + "Reviewing sockets : {:?}", + topology.get_sockets_passive() + ); } } - }, + } None => { warn!("Couldn't get cpu topo info"); } } } else { no_more_sockets = true; - debug!("There's likely to be no more socket to explore."); + debug!( + "There's likely to be no more socket to explore." + ); break; } - } + } if !no_more_sockets { debug!("inserting socket {:?}", current_socket); topology.safe_insert_socket(current_socket); - i = i + 1; + i += 1; } } - nb_cpu_sockets = i; - }, + } None => { panic!("Could'nt get core ids from core_affinity."); } @@ -448,14 +544,13 @@ impl Sensor for MsrRAPLSensor { panic!("Error was : {:?}", last_error); } } else { - panic!("Getting thread group affinity failed !"); + error!("Getting thread group affinity failed !"); let last_error = GetLastError(); panic!("Error was: {:?}", last_error); // win32 error 122 is insufficient buffer } } //let process_information = GetProcessInformation(current_process, , , ); } - //nb_cpu_sockets = logical_cpus.len() as u16 / logical_cpus_from_cpuid; //let mut core_id_counter = logical_cpus.len(); //match cpuid.get_advanced_power_mgmt_info() { @@ -506,29 +601,38 @@ impl Sensor for MsrRAPLSensor { // warn!("Couldn't get cpu capacity info"); // } //} - //TODO: fix - //i=0; - //while i < nb_cpu_sockets { - // //topology.safe_add_domain_to_socket(i, , name, uj_counter, buffer_max_kbytes, sensor_data) - // i = i + 1; - //} //topology.add_cpu_cores(); - let mut domains = vec![]; + let mut domains = vec![]; for s in topology.get_sockets() { debug!("Inspecting CPUSocket: {:?}", s); unsafe { - let core_id = s.get_cores_passive().last().unwrap().id + s.id * s.cpu_cores.len() as u16; - debug!("Asking get_msr_value, from generate_tpopo, with core_id={}", core_id); - match get_msr_value(core_id as usize, MSR_DRAM_ENERGY_STATUS as u64, &sensor_data) { + let core_id = + s.get_cores_passive().last().unwrap().id + s.id * s.cpu_cores.len() as u16; + debug!( + "Asking get_msr_value, from generate_tpopo, with core_id={}", + core_id + ); + match get_msr_value( + core_id as usize, + MSR_DRAM_ENERGY_STATUS as u64, + &sensor_data, + ) { Ok(_rec) => { debug!("Adding domain Dram !"); let mut domain_sensor_data = sensor_data.clone(); - domain_sensor_data.insert(String::from("MSR_ADDR"), MSR_DRAM_ENERGY_STATUS.to_string()); + domain_sensor_data + .insert(String::from("MSR_ADDR"), MSR_DRAM_ENERGY_STATUS.to_string()); domain_sensor_data.insert(String::from("CORE_ID"), core_id.to_string()); // nb of cores in a socket * socket_id + local_core_id domains.push(String::from("dram")); - s.safe_add_domain(Domain::new(2, String::from("dram"), String::from(""), 5, domain_sensor_data)) - }, + s.safe_add_domain(Domain::new( + 2, + String::from("dram"), + String::from(""), + 5, + domain_sensor_data, + )) + } Err(e) => { warn!("Could'nt add Dram domain: {}", e); } @@ -537,11 +641,18 @@ impl Sensor for MsrRAPLSensor { Ok(_rec) => { debug!("Adding domain Core !"); let mut domain_sensor_data = sensor_data.clone(); - domain_sensor_data.insert(String::from("MSR_ADDR"), MSR_PP0_ENERGY_STATUS.to_string()); + domain_sensor_data + .insert(String::from("MSR_ADDR"), MSR_PP0_ENERGY_STATUS.to_string()); domain_sensor_data.insert(String::from("CORE_ID"), core_id.to_string()); domains.push(String::from("core")); - s.safe_add_domain(Domain::new(2, String::from("core"), String::from(""), 5, domain_sensor_data)) - }, + s.safe_add_domain(Domain::new( + 2, + String::from("core"), + String::from(""), + 5, + domain_sensor_data, + )) + } Err(e) => { warn!("Could'nt add Core domain: {}", e); } @@ -550,11 +661,18 @@ impl Sensor for MsrRAPLSensor { Ok(_rec) => { debug!("Adding domain Uncore !"); let mut domain_sensor_data = sensor_data.clone(); - domain_sensor_data.insert(String::from("MSR_ADDR"), MSR_PP1_ENERGY_STATUS.to_string()); + domain_sensor_data + .insert(String::from("MSR_ADDR"), MSR_PP1_ENERGY_STATUS.to_string()); domain_sensor_data.insert(String::from("CORE_ID"), core_id.to_string()); domains.push(String::from("uncore")); - s.safe_add_domain(Domain::new(2, String::from("uncore"), String::from(""), 5, domain_sensor_data)) - }, + s.safe_add_domain(Domain::new( + 2, + String::from("uncore"), + String::from(""), + 5, + domain_sensor_data, + )) + } Err(e) => { warn!("Could'nt add Uncore domain: {}", e); } @@ -573,8 +691,10 @@ impl Sensor for MsrRAPLSensor { match get_msr_value(0, MSR_PLATFORM_ENERGY_STATUS as u64, &sensor_data) { Ok(_rec) => { debug!("Adding domain Platform / PSYS !"); - topology._sensor_data.insert(String::from("psys"), String::from("")); - }, + topology + ._sensor_data + .insert(String::from("psys"), String::from("")); + } Err(e) => { warn!("Could'nt add Uncore domain: {}", e); } @@ -594,19 +714,38 @@ impl Sensor for MsrRAPLSensor { } } -pub unsafe fn get_msr_value(core_id: usize, msr_addr: u64, sensor_data: &HashMap) -> Result { +/// # Safety +/// +/// This function should is unsafe rust as it uses send_request, hence calls a DeviceIO Windows driver. +/// The safety burden actuallr resides in the DeviceIO driver that is called. Please refer to the documentation to +/// get the relationship between Scaphandre and its driver for Windows. The driver should exit smoothly if a wrong +/// MSR address is called, then this function should throw an Error. Any improper issue with the operating system would mean +/// there is an issue in the driver used behind the scene, or the way it is configured. +pub unsafe fn get_msr_value( + core_id: usize, + msr_addr: u64, + sensor_data: &HashMap, +) -> Result { let current_process = GetCurrentProcess(); let current_thread = GetCurrentThread(); - let mut thread_group_affinity = GROUP_AFFINITY { Mask: 255, Group: 9, Reserved: [0,0,0] }; - let thread_affinity_res = GetThreadGroupAffinity(current_thread, &mut thread_group_affinity); + let mut thread_group_affinity = GROUP_AFFINITY { + Mask: 255, + Group: 9, + Reserved: [0, 0, 0], + }; + let thread_affinity_res = GetThreadGroupAffinity(current_thread, &mut thread_group_affinity); if thread_affinity_res.as_bool() { debug!("Thread affinity found : {:?}", thread_group_affinity); } else { error!("Could'nt get thread group affinity"); } - let mut process_group_array: [u16; 8] = [0,0,0,0,0,0,0,0]; + let mut process_group_array: [u16; 8] = [0, 0, 0, 0, 0, 0, 0, 0]; let mut process_group_array_len = 8; - let process_affinity_res = GetProcessGroupAffinity(current_process, &mut process_group_array_len, process_group_array.as_mut_ptr()); + let process_affinity_res = GetProcessGroupAffinity( + current_process, + &mut process_group_array_len, + process_group_array.as_mut_ptr(), + ); if process_affinity_res.as_bool() { debug!("Process affinity found: {:?}", process_group_array); } else { @@ -623,7 +762,7 @@ pub unsafe fn get_msr_value(core_id: usize, msr_addr: u64, sensor_data: &HashMap debug!("core_id: {:b}", ((core_id as u64) << 32)); let src = ((core_id as u64) << 32) | msr_addr; //let src = ((core_id as u64) << 32) | msr_addr; let ptr = &src as *const u64; - + debug!("src: {:x}", src); debug!("src: {:b}", src); debug!("*ptr: {:b}", *ptr); @@ -638,7 +777,7 @@ pub unsafe fn get_msr_value(core_id: usize, msr_addr: u64, sensor_data: &HashMap ptr_result, size_of::(), ) { - Ok(res) => { + Ok(_res) => { close_handle(device); let energy_unit = sensor_data @@ -646,7 +785,8 @@ pub unsafe fn get_msr_value(core_id: usize, msr_addr: u64, sensor_data: &HashMap .unwrap() .parse::() .unwrap(); - let current_value = MsrRAPLSensor::extract_rapl_current_power(msr_result, energy_unit); + let current_value = + MsrRAPLSensor::extract_rapl_current_power(msr_result, energy_unit); debug!("current_value: {}", current_value); Ok(Record { @@ -654,17 +794,17 @@ pub unsafe fn get_msr_value(core_id: usize, msr_addr: u64, sensor_data: &HashMap unit: super::units::Unit::MicroJoule, value: current_value, }) - }, + } Err(e) => { error!("Failed to get data from send_request: {:?}", e); close_handle(device); Err(format!("Failed to get data from send_request: {:?}", e)) } } - }, + } Err(e) => { error!("Couldn't get driver handle : {:?}", e); Err(format!("Couldn't get driver handle : {:?}", e)) } } -} \ No newline at end of file +} From 759ab4c0529eb9099de4702a469556cf4bb2da91 Mon Sep 17 00:00:00 2001 From: Benoit Petit Date: Fri, 5 Jan 2024 15:02:14 +0100 Subject: [PATCH 27/33] style: clippy and fmt --- src/exporters/mod.rs | 1 - src/exporters/prometheus.rs | 3 +-- src/exporters/qemu.rs | 3 +-- src/exporters/riemann.rs | 3 +-- src/main.rs | 3 +-- src/sensors/mod.rs | 4 ++-- src/sensors/msr_rapl.rs | 24 ++++++++++-------------- 7 files changed, 16 insertions(+), 25 deletions(-) diff --git a/src/exporters/mod.rs b/src/exporters/mod.rs index 60c6d857..5b3d750c 100644 --- a/src/exporters/mod.rs +++ b/src/exporters/mod.rs @@ -897,7 +897,6 @@ impl MetricGenerator { Ok(events) => { if !events.is_empty() { self.gen_docker_containers_basic_metadata(); - } else { } } Err(err) => debug!("couldn't get docker events - {:?} - {}", err, err), diff --git a/src/exporters/prometheus.rs b/src/exporters/prometheus.rs index 9159c3a9..29d7cd01 100644 --- a/src/exporters/prometheus.rs +++ b/src/exporters/prometheus.rs @@ -5,8 +5,8 @@ //! [scrape](https://prometheus.io/docs/prometheus/latest/getting_started). use super::utils; -use crate::sensors::utils::current_system_time_since_epoch; use crate::exporters::{Exporter, MetricGenerator, MetricValueType}; +use crate::sensors::utils::current_system_time_since_epoch; use crate::sensors::{Sensor, Topology}; use chrono::Utc; use hyper::service::{make_service_fn, service_fn}; @@ -16,7 +16,6 @@ use std::{ collections::HashMap, fmt::Write, net::{IpAddr, Ipv4Addr, SocketAddr}, - sync::mpsc::Receiver, sync::{Arc, Mutex}, time::Duration, }; diff --git a/src/exporters/qemu.rs b/src/exporters/qemu.rs index 829645e6..de3355e5 100644 --- a/src/exporters/qemu.rs +++ b/src/exporters/qemu.rs @@ -1,7 +1,6 @@ use crate::exporters::Exporter; use crate::sensors::Topology; use crate::sensors::{utils::ProcessRecord, Sensor}; -use std::sync::mpsc::Receiver; use std::{fs, io, thread, time}; /// An Exporter that extracts power consumption data of running @@ -18,7 +17,7 @@ pub struct QemuExporter { impl Exporter for QemuExporter { /// Runs [iterate()] in a loop. - fn run(&mut self, channel: Receiver) { + fn run(&mut self) { info!("Starting qemu exporter"); let path = "/var/lib/libvirt/scaphandre"; let cleaner_step = 120; diff --git a/src/exporters/riemann.rs b/src/exporters/riemann.rs index e2abadac..7635db04 100644 --- a/src/exporters/riemann.rs +++ b/src/exporters/riemann.rs @@ -10,7 +10,6 @@ use riemann_client::proto::{Attribute, Event}; use riemann_client::Client; use std::collections::HashMap; use std::convert::TryFrom; -use std::sync::mpsc::Receiver; use std::time::{Duration, SystemTime, UNIX_EPOCH}; /// Riemann server default ipv4/ipv6 address @@ -169,7 +168,7 @@ impl RiemannExporter { impl Exporter for RiemannExporter { /// Entry point of the RiemannExporter. - fn run(&mut self, channel: &Receiver) { + fn run(&mut self) { info!( "{}: Starting Riemann exporter", Utc::now().format("%Y-%m-%dT%H:%M:%S") diff --git a/src/main.rs b/src/main.rs index 320c0cb9..531e862d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,7 +3,6 @@ use clap::{command, ArgAction, Parser, Subcommand}; use colored::Colorize; use scaphandre::{exporters, sensors::Sensor}; -use std::thread; #[cfg(target_os = "linux")] use scaphandre::sensors::powercap_rapl; @@ -20,7 +19,7 @@ use windows_service::{ service::ServiceStatus, service::ServiceType, service_control_handler::{self, ServiceControlHandlerResult}, - service_dispatcher + service_dispatcher, }; #[cfg(target_os = "windows")] diff --git a/src/sensors/mod.rs b/src/sensors/mod.rs index 40dd4547..18054b8a 100644 --- a/src/sensors/mod.rs +++ b/src/sensors/mod.rs @@ -318,7 +318,7 @@ impl Topology { warn!("coud't not match core to socket - mapping to first socket instead - if you are not using --vm there is something wrong") } } - + //#[cfg(target_os = "windows")] //{ //TODO: fix @@ -941,7 +941,7 @@ impl Topology { /// /// This function is unsafe rust as it calls get_msr_value function from msr_rapl sensor module. /// It calls the msr_RAPL::MSR_PLATFORM_ENERGY_STATUS MSR address, which has been tested on several Intel x86 processors - /// but might fail on AMD (needs testing). That being said, it returns None if the msr query fails (which means if the Windows + /// but might fail on AMD (needs testing). That being said, it returns None if the msr query fails (which means if the Windows /// driver fails.) and should not prevent from using a value coming from elsewhere, which means from another get_msr_value calls /// targeting another msr address. #[cfg(target_os = "windows")] diff --git a/src/sensors/msr_rapl.rs b/src/sensors/msr_rapl.rs index 1a0ae715..e41426f0 100644 --- a/src/sensors/msr_rapl.rs +++ b/src/sensors/msr_rapl.rs @@ -279,13 +279,11 @@ impl RecordReader for CPUSocket { MSR_PKG_ENERGY_STATUS as u64, &self.sensor_data, ) { - Ok(rec) => { - Ok(Record { - timestamp: current_system_time_since_epoch(), - value: rec.value, - unit: super::units::Unit::MicroJoule, - }) - } + Ok(rec) => Ok(Record { + timestamp: current_system_time_since_epoch(), + value: rec.value, + unit: super::units::Unit::MicroJoule, + }), Err(e) => { error!( "Could'nt get MSR value for {}: {}", @@ -324,13 +322,11 @@ impl RecordReader for Domain { msr_addr.parse::().unwrap(), &self.sensor_data, ) { - Ok(rec) => { - Ok(Record { - timestamp: current_system_time_since_epoch(), - unit: super::units::Unit::MicroJoule, - value: rec.value, - }) - } + Ok(rec) => Ok(Record { + timestamp: current_system_time_since_epoch(), + unit: super::units::Unit::MicroJoule, + value: rec.value, + }), Err(e) => { error!("Could'nt get MSR value for {}: {}", msr_addr, e); Ok(Record { From 839d660ad3558a9c643676d23d7386424548bbcd Mon Sep 17 00:00:00 2001 From: bpetit Date: Fri, 5 Jan 2024 15:33:56 +0100 Subject: [PATCH 28/33] style: clippy --- src/main.rs | 2 +- src/sensors/utils.rs | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main.rs b/src/main.rs index 531e862d..f1d68881 100644 --- a/src/main.rs +++ b/src/main.rs @@ -167,7 +167,7 @@ fn my_service_main(_arguments: Vec) { "Starting main thread, service status has been set: {:?}", status_set ); - thread_handle = Some(thread::spawn(move || { + thread_handle = Some(std::thread::spawn(move || { parse_cli_and_run_exporter(); })); } diff --git a/src/sensors/utils.rs b/src/sensors/utils.rs index ceac1cf2..2081b1b4 100644 --- a/src/sensors/utils.rs +++ b/src/sensors/utils.rs @@ -324,7 +324,7 @@ impl ProcessTracker { // check if the previous records in the vector are from the same process // (if the process with that pid is not a new one) and if so, drop it for a new one if !vector.is_empty() - && process_record.process.comm != vector.get(0).unwrap().process.comm + && process_record.process.comm != vector.first().unwrap().process.comm { *vector = vec![]; } @@ -627,12 +627,12 @@ impl ProcessTracker { let mut result = self .procs .iter() - .filter(|x| !x.is_empty() && x.get(0).unwrap().process.pid == pid); + .filter(|x| !x.is_empty() && x.first().unwrap().process.pid == pid); let process = result.next().unwrap(); if result.next().is_some() { panic!("Found two vectors of processes with the same id, maintainers should fix this."); } - process.get(0).unwrap().process.comm.clone() + process.first().unwrap().process.comm.clone() } /// Returns the cmdline string associated to a PID @@ -640,9 +640,9 @@ impl ProcessTracker { let mut result = self .procs .iter() - .filter(|x| !x.is_empty() && x.get(0).unwrap().process.pid == pid); + .filter(|x| !x.is_empty() && x.first().unwrap().process.pid == pid); let process = result.next().unwrap(); - if let Some(p) = process.get(0) { + if let Some(p) = process.first() { let cmdline_request = p.process.cmdline(self); if let Ok(mut cmdline_vec) = cmdline_request { let mut cmdline = String::from(""); From 402fdacd1b48752296458d866c2904b6f5adbdf1 Mon Sep 17 00:00:00 2001 From: bpetit Date: Fri, 5 Jan 2024 17:10:20 +0100 Subject: [PATCH 29/33] feat: adding dram to sum of sockets PKG in host total power, when psys is unavailable, on windows --- src/sensors/msr_rapl.rs | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/sensors/msr_rapl.rs b/src/sensors/msr_rapl.rs index e41426f0..8b212f1b 100644 --- a/src/sensors/msr_rapl.rs +++ b/src/sensors/msr_rapl.rs @@ -176,18 +176,28 @@ impl RecordReader for Topology { if let Some(psys_record) = record { Ok(psys_record) } else { - let mut res: u64 = 0; + let mut res: u128 = 0; debug!("Topology: I have {} sockets", self.sockets.len()); for s in &self.sockets { match s.read_record() { Ok(rec) => { debug!("rec: {:?}", rec); - res += rec.value.parse::()?; + res += rec.value.trim().parse::()?; } Err(e) => { error!("Failed to get socket record : {:?}", e); } } + let dram_filter: Vec<&Domain> = s + .get_domains_passive() + .iter() + .filter(|d| d.name == "dram") + .collect(); + if let Some(dram) = dram_filter.first() { + if let Ok(val) = dram.read_record() { + res += val.value.trim().parse::()?; + } + } } Ok(Record { timestamp: current_system_time_since_epoch(), From 94115c6211478a832599c952927d06c12fd6d92e Mon Sep 17 00:00:00 2001 From: bpetit Date: Fri, 12 Jan 2024 17:03:27 +0100 Subject: [PATCH 30/33] chore: lowering log level for fail cases, as it might happen on first metric collection --- src/sensors/msr_rapl.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/sensors/msr_rapl.rs b/src/sensors/msr_rapl.rs index 8b212f1b..75596a50 100644 --- a/src/sensors/msr_rapl.rs +++ b/src/sensors/msr_rapl.rs @@ -185,7 +185,7 @@ impl RecordReader for Topology { res += rec.value.trim().parse::()?; } Err(e) => { - error!("Failed to get socket record : {:?}", e); + warn!("Failed to get socket record : {:?}", e); } } let dram_filter: Vec<&Domain> = s @@ -245,7 +245,7 @@ unsafe fn send_request( info!("Device answered"); Ok(String::from("Device answered !")) } else { - error!("DeviceIoControl failed"); + info!("DeviceIoControl failed"); Err(String::from("DeviceIoControl failed")) } } @@ -802,7 +802,7 @@ pub unsafe fn get_msr_value( }) } Err(e) => { - error!("Failed to get data from send_request: {:?}", e); + info!("Failed to get data from send_request: {:?}", e); close_handle(device); Err(format!("Failed to get data from send_request: {:?}", e)) } From 7739309840b8bc251c28c886511a91de1a1f2dac Mon Sep 17 00:00:00 2001 From: Benoit Petit Date: Sat, 27 Jan 2024 11:14:04 +0100 Subject: [PATCH 31/33] fix: removed useless check fails in CI + style --- src/sensors/msr_rapl.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/sensors/msr_rapl.rs b/src/sensors/msr_rapl.rs index 8b212f1b..33271742 100644 --- a/src/sensors/msr_rapl.rs +++ b/src/sensors/msr_rapl.rs @@ -391,9 +391,6 @@ impl Sensor for MsrRAPLSensor { panic!("Could'nt get cpuid data."); } } - if logical_cpus_from_cpuid <= 1 { - panic!("CpuID data is likely to be wrong."); - } let mut i: u16 = 0; let mut no_more_sockets = false; debug!("Entering ProcessorGroup {}", group_id); From fd4244e98af532a20f2ba7783c3ec759a201ff1d Mon Sep 17 00:00:00 2001 From: bpetit Date: Tue, 30 Jan 2024 16:30:26 +0100 Subject: [PATCH 32/33] ci: disabling tests that can't run on a GH virtual machine style: fmt ci: disabling tests that can't run on a GH virtual machine ci: fix ci: fix ci: fix ci: fix ci: fix ci: fix ci: fixing path for devcon.exe in exe build workflow ci: fix ci: fix ci: fix ci: fix --- .github/workflows/build-and-test.yml | 4 +- .../workflows/exe-release-prometheuspush.yml | 17 +-- packaging/windows/installer.iss | 5 +- src/sensors/msr_rapl.rs | 101 ++++++++++-------- 4 files changed, 61 insertions(+), 66 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index c75ebcf7..eb7a4267 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -171,7 +171,7 @@ jobs: rustup toolchain install stable-x86_64-pc-windows-msvc - name: Tests run: | - cargo test --no-default-features --features "prometheus json riemann" + cargo test --no-default-features --features "prometheus prometheuspush json riemann" exporters - name: Build (debug mode) run: | - cargo build --no-default-features --features "prometheus json riemann" + cargo build --no-default-features --features "prometheus prometheuspush json riemann" diff --git a/.github/workflows/exe-release-prometheuspush.yml b/.github/workflows/exe-release-prometheuspush.yml index aded447c..d7ffd4f7 100644 --- a/.github/workflows/exe-release-prometheuspush.yml +++ b/.github/workflows/exe-release-prometheuspush.yml @@ -12,7 +12,7 @@ on: branches: [ '336-proper-handling-of-windows-service-management' ] env: - WRD_VERSION: v0.0.3 + WRD_VERSION: v0.0.4 WRD_BASE_URL: https://github.com/hubblo-org/windows-rapl-driver/releases/download jobs: @@ -35,7 +35,6 @@ jobs: run: | $dest = "DriverLoader.exe" $url = "${{ env.WRD_BASE_URL }}/${{ env.WRD_VERSION }}/DriverLoader.exe" - echo ($url -replace '"', "") Invoke-WebRequest -Uri ($url -replace '"', "") -OutFile $dest $dest = "ScaphandreDrv.cat" $url = "${{ env.WRD_BASE_URL }}/${{ env.WRD_VERSION }}/ScaphandreDrv.cat" @@ -46,16 +45,6 @@ jobs: $dest = "ScaphandreDrv.inf" $url = "${{ env.WRD_BASE_URL }}/${{ env.WRD_VERSION }}/ScaphandreDrv.inf" Invoke-WebRequest -Uri ($url -replace '"', "") -OutFile $dest - $dest = "ScaphandreDrvTest.cer" - $url = "${{ env.WRD_BASE_URL }}/${{ env.WRD_VERSION }}/ScaphandreDrvTest.cer" - Invoke-WebRequest -Uri ($url -replace '"', "") -OutFile $dest - $dest = "devcon.exe" - $url = "${{ env.WRD_BASE_URL }}/${{ env.WRD_VERSION }}/devcon.exe" - Invoke-WebRequest -Uri ($url -replace '"', "") -OutFile $dest - $dest = "certmgr.exe" - $url = "${{ env.WRD_BASE_URL }}/${{ env.WRD_VERSION }}/certmgr.exe" - Invoke-WebRequest -Uri ($url -replace '"', "") -OutFile $dest - ls - name: Install Rustup uses: crazy-max/ghaction-chocolatey@v2 with: @@ -72,10 +61,10 @@ jobs: - name: Upload artifact #Install-PackageProvider -Name NuGet -MinimumVersion 2.8.5.201 -Force run: | Set-PSRepository -Name 'PSGallery' -InstallationPolicy Trusted - Install-Module -Confirm:$False -Name AWS.Tools.Installer + Install-Module -Confirm:$False -Name AWS.Tools.Installer Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope LocalMachine Import-Module AWS.Tools.Installer - Install-AWSToolsModule AWS.Tools.EC2,AWS.Tools.S3 -CleanUp -Confirm:$False + Install-AWSToolsModule AWS.Tools.EC2,AWS.Tools.S3 -CleanUp -Confirm:$False -AllowClobber Set-AWSCredential -AccessKey ${{ secrets.S3_ACCESS_KEY_ID }} -SecretKey ${{ secrets.S3_SECRET_ACCESS_KEY }} -StoreAs default mv packaging/windows/Output/scaphandre_installer.exe scaphandre_${{ github.ref_name }}_installer.exe $clientconfig=@{ diff --git a/packaging/windows/installer.iss b/packaging/windows/installer.iss index bc5287ac..4817f2a9 100644 --- a/packaging/windows/installer.iss +++ b/packaging/windows/installer.iss @@ -45,11 +45,10 @@ Source: "../../ScaphandreDrv.sys"; DestDir: "{app}"; Source: "../../ScaphandreDrv.cat"; DestDir: "{app}"; ; Source: "../../ScaphandreDrv.cat"; DestDir: "{#SystemFolder}"; ; Source: "../../ScaphandreDrv.cat"; DestDir: "{#System64Folder}"; -Source: "../../devcon.exe"; DestDir: "{app}"; Flags: ignoreversion -Source: "../../certmgr.exe"; DestDir: "{app}"; Flags: ignoreversion +Source: "C:\Program Files (x86)\Windows Kits\10\Tools\10.0.22621.0\x64\devcon.exe"; DestDir: "{app}"; Flags: ignoreversion +Source: "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\x64\certmgr.exe"; DestDir: "{app}"; Flags: ignoreversion Source: "../../README.md"; DestDir: "{app}"; Flags: ignoreversion Source: "../../CHANGELOG.md"; DestDir: "{app}"; Flags: ignoreversion -Source: "../../ScaphandreDrvTest.cer"; DestDir: "{app}"; Flags: ignoreversion ; NOTE: Don't use "Flags: ignoreversion" on any shared system files [Icons] diff --git a/src/sensors/msr_rapl.rs b/src/sensors/msr_rapl.rs index e4533006..36dd1395 100644 --- a/src/sensors/msr_rapl.rs +++ b/src/sensors/msr_rapl.rs @@ -756,58 +756,65 @@ pub unsafe fn get_msr_value( error!("Error was : {:?}", GetLastError()); } debug!("Core ID requested to the driver : {}", core_id); - match get_handle(sensor_data.get("DRIVER_NAME").unwrap()) { - Ok(device) => { - let mut msr_result: u64 = 0; - let ptr_result = &mut msr_result as *mut u64; - debug!("msr_addr: {:b}", msr_addr); - debug!("core_id: {:x} {:b}", (core_id as u64), (core_id as u64)); - debug!("core_id: {:b}", ((core_id as u64) << 32)); - let src = ((core_id as u64) << 32) | msr_addr; //let src = ((core_id as u64) << 32) | msr_addr; - let ptr = &src as *const u64; - - debug!("src: {:x}", src); - debug!("src: {:b}", src); - debug!("*ptr: {:b}", *ptr); - //warn!("*ptr: {}", *ptr); - //warn!("*ptr: {:b}", *ptr); - - match send_request( - device, - MSR_PKG_ENERGY_STATUS, - ptr, - 8, - ptr_result, - size_of::(), - ) { - Ok(_res) => { - close_handle(device); - - let energy_unit = sensor_data - .get("ENERGY_UNIT") - .unwrap() - .parse::() - .unwrap(); - let current_value = - MsrRAPLSensor::extract_rapl_current_power(msr_result, energy_unit); - debug!("current_value: {}", current_value); - - Ok(Record { - timestamp: current_system_time_since_epoch(), - unit: super::units::Unit::MicroJoule, - value: current_value, - }) + match sensor_data.get("DRIVER_NAME") { + Some(driver) => { + match get_handle(driver) { + Ok(device) => { + let mut msr_result: u64 = 0; + let ptr_result = &mut msr_result as *mut u64; + debug!("msr_addr: {:b}", msr_addr); + debug!("core_id: {:x} {:b}", (core_id as u64), (core_id as u64)); + debug!("core_id: {:b}", ((core_id as u64) << 32)); + let src = ((core_id as u64) << 32) | msr_addr; //let src = ((core_id as u64) << 32) | msr_addr; + let ptr = &src as *const u64; + + debug!("src: {:x}", src); + debug!("src: {:b}", src); + debug!("*ptr: {:b}", *ptr); + //warn!("*ptr: {}", *ptr); + //warn!("*ptr: {:b}", *ptr); + + match send_request( + device, + MSR_PKG_ENERGY_STATUS, + ptr, + 8, + ptr_result, + size_of::(), + ) { + Ok(_res) => { + close_handle(device); + + let energy_unit = sensor_data + .get("ENERGY_UNIT") + .unwrap() + .parse::() + .unwrap(); + let current_value = + MsrRAPLSensor::extract_rapl_current_power(msr_result, energy_unit); + debug!("current_value: {}", current_value); + + Ok(Record { + timestamp: current_system_time_since_epoch(), + unit: super::units::Unit::MicroJoule, + value: current_value, + }) + } + Err(e) => { + info!("Failed to get data from send_request: {:?}", e); + close_handle(device); + Err(format!("Failed to get data from send_request: {:?}", e)) + } + } } Err(e) => { - info!("Failed to get data from send_request: {:?}", e); - close_handle(device); - Err(format!("Failed to get data from send_request: {:?}", e)) + error!("Couldn't get driver handle : {:?}", e); + Err(format!("Couldn't get driver handle : {:?}", e)) } } } - Err(e) => { - error!("Couldn't get driver handle : {:?}", e); - Err(format!("Couldn't get driver handle : {:?}", e)) + None => { + panic!("DRIVER_NAME not set."); } } } From 75b1de64e7463b65b802ed6ea4841b1fa57bec41 Mon Sep 17 00:00:00 2001 From: bpetit Date: Fri, 9 Feb 2024 17:52:14 +0100 Subject: [PATCH 33/33] ci: fix --- .github/workflows/build-and-test.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index eb7a4267..db6d6261 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -10,6 +10,7 @@ on: - 'CITATION' - 'book.toml' - 'CONTRIBUTING.md' + - '.github/workflows/exe-release-prometheuspush.yml' pull_request: branches: [ main, dev ] paths-ignore: @@ -18,6 +19,7 @@ on: - 'CHANGELOG.md' - 'CITATION' - 'book.toml' + - '.github/workflows/exe-release-prometheuspush.yml' env: CARGO_TERM_COLOR: always