From ccb959097da0709ac38c4f5439f4f539b2e59a5d Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Wed, 3 Sep 2025 14:54:09 -0700 Subject: [PATCH 1/7] rudimentary PSC sequencer ereports --- Cargo.lock | 7 + drv/i2c-devices/src/mwocp68.rs | 2 +- drv/psc-seq-server/Cargo.toml | 9 +- drv/psc-seq-server/build.rs | 7 + drv/psc-seq-server/src/main.rs | 373 +++++++++++++++++++++++++++++---- 5 files changed, 354 insertions(+), 44 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a97dce8ece..a3cdcea9fd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1988,13 +1988,20 @@ dependencies = [ name = "drv-psc-seq-server" version = "0.1.0" dependencies = [ + "build-i2c", "build-util", + "drv-i2c-api", + "drv-i2c-devices", "drv-packrat-vpd-loader", "drv-psc-seq-api", "drv-stm32xx-sys-api", "idol", + "minicbor 2.1.1", + "minicbor-serde 0.6.0", "ringbuf", + "serde", "task-jefe-api", + "task-packrat-api", "userlib", ] diff --git a/drv/i2c-devices/src/mwocp68.rs b/drv/i2c-devices/src/mwocp68.rs index 516612014d..bbd8fb640b 100644 --- a/drv/i2c-devices/src/mwocp68.rs +++ b/drv/i2c-devices/src/mwocp68.rs @@ -30,7 +30,7 @@ pub struct Mwocp68 { #[derive(Copy, Clone, PartialEq)] pub struct FirmwareRev(pub [u8; 4]); -#[derive(Copy, Clone, PartialEq, Default)] +#[derive(Copy, Clone, PartialEq, Eq, Default)] pub struct SerialNumber(pub [u8; 12]); // diff --git a/drv/psc-seq-server/Cargo.toml b/drv/psc-seq-server/Cargo.toml index 6f58ff22cd..e727f3ffe8 100644 --- a/drv/psc-seq-server/Cargo.toml +++ b/drv/psc-seq-server/Cargo.toml @@ -7,13 +7,20 @@ edition = "2021" drv-packrat-vpd-loader.path = "../packrat-vpd-loader" drv-psc-seq-api.path = "../psc-seq-api" drv-stm32xx-sys-api = { path = "../../drv/stm32xx-sys-api", features = ["family-stm32h7"] } +drv-i2c-api = { path ="../../drv/i2c-api", features = ["component-id"] } +drv-i2c-devices.path = "../../drv/i2c-devices" task-jefe-api.path = "../../task/jefe-api" +task-packrat-api.path = "../../task/packrat-api" userlib = { path = "../../sys/userlib", features = ["panic-messages"] } ringbuf = { path = "../../lib/ringbuf" } +serde.workspace = true +minicbor.workspace = true +minicbor-serde.workspace = true [build-dependencies] idol.workspace = true -build-util = {path = "../../build/util"} +build-util = { path = "../../build/util" } +build-i2c = { path = "../../build/i2c" } # This section is here to discourage RLS/rust-analyzer from doing test builds, # since test builds don't work for cross compilation. diff --git a/drv/psc-seq-server/build.rs b/drv/psc-seq-server/build.rs index d57bdbf7be..19a69a848d 100644 --- a/drv/psc-seq-server/build.rs +++ b/drv/psc-seq-server/build.rs @@ -4,5 +4,12 @@ fn main() -> Result<(), Box> { build_util::build_notifications()?; + + let disposition = build_i2c::Disposition::Devices; + + if let Err(e) = build_i2c::codegen(disposition) { + println!("cargo::error=code generation failed: {e}"); + std::process::exit(1); + } Ok(()) } diff --git a/drv/psc-seq-server/src/main.rs b/drv/psc-seq-server/src/main.rs index 1b082216fb..78d8667ec0 100644 --- a/drv/psc-seq-server/src/main.rs +++ b/drv/psc-seq-server/src/main.rs @@ -99,6 +99,11 @@ #![no_std] #![no_main] +use drv_i2c_api::{self as i2c, I2cDevice}; +use drv_i2c_devices::{ + m24c02::M24C02, + mwocp68::{self, Mwocp68}, +}; use drv_packrat_vpd_loader::{read_vpd_and_load_packrat, Packrat}; use drv_psc_seq_api::PowerState; use drv_stm32xx_sys_api as sys_api; @@ -121,11 +126,13 @@ enum Trace { /// detect it as "on" due to the pull resistors.) FoundEnabled { psu: u8, + serial: Option, }, /// Emitted at task startup when we find that a power supply appears to have /// been disabled. FoundAlreadyDisabled { psu: u8, + serial: Option, }, /// Emitted when we decide a power supply should be on. Enabling { @@ -138,6 +145,25 @@ enum Trace { psu: u8, present: bool, }, + I2cError { + psu: u8, + txn: I2cTxn, + err: mwocp68::Error, + }, + EreportSentOff { + psu: u8, + class: ereport::Class, + len: usize, + }, + EreportTooBig { + psu: u8, + class: ereport::Class, + }, +} + +#[derive(Copy, Clone, PartialEq, Eq)] +enum I2cTxn { + ReadSerial, } ringbuf!((u64, Trace), 128, (0, Trace::Empty)); @@ -187,6 +213,16 @@ const PSU_PWR_OK_NOTIF: [u32; PSU_COUNT] = [ notifications::PSU_PWR_OK_6_MASK, ]; +/// In order to get the PMBus devices by PSU index, we need a little lookup table guy. +const PSU_PMBUS_DEVS: [fn(TaskId) -> (I2cDevice, u8); PSU_COUNT] = [ + i2c_config::pmbus::v54_psu0 as fn(TaskId) -> (I2cDevice, u8), + i2c_config::pmbus::v54_psu1 as fn(TaskId) -> (I2cDevice, u8), + i2c_config::pmbus::v54_psu2 as fn(TaskId) -> (I2cDevice, u8), + i2c_config::pmbus::v54_psu3 as fn(TaskId) -> (I2cDevice, u8), + i2c_config::pmbus::v54_psu4 as fn(TaskId) -> (I2cDevice, u8), + i2c_config::pmbus::v54_psu5 as fn(TaskId) -> (I2cDevice, u8), +]; + /// How long to wait after task startup before we start trying to inspect /// things. const STARTUP_SETTLE_MS: u64 = 500; // Current value is somewhat arbitrary. @@ -231,8 +267,7 @@ enum PsuState { /// The PSU is detected as not present. In this state, we cannot trust the /// OK signal, and we deassert the ENABLE signal. NotPresent, - /// The PSU is detected as present. We assume this at powerup until proven - /// otherwise. + /// The PSU is detected as present. Present(PresentState), } @@ -379,28 +414,63 @@ fn main() -> ! { // Turn on pin change notifications on all of our input nets. sys.gpio_irq_configure(all_pin_notifications, Edge::Both); - // Set up our state machines for each PSU. The initial state is always set - // as present, and only the fault state is set based on our sensing of the - // ON lines. This lets the normal logic used for handling absence and faults - // in the loop below also handle the startup case. + // Set up our state machines for each PSU. We'll need to read the presence + // pins to determine whether a PSU is present and if we should ask it for + // its serial number. + let present_l_bits = sys.gpio_read(ALL_PSU_PRESENT_L_PINS); let start_time = sys_get_timer().now; - let psu_states: [PsuState; PSU_COUNT] = core::array::from_fn(|i| { - PsuState::Present(if initial_psu_enabled[i] { - ringbuf_entry!((start_time, Trace::FoundEnabled { psu: i as u8 })); - PresentState::On + + let mut psus: [Psu; PSU_COUNT] = core::array::from_fn(|i| { + let dev = { + let i2c = I2C.get_task_id(); + let make_dev = PSU_PMBUS_DEVS[i]; + let (dev, rail) = make_dev(i2c); + Mwocp68::new(&dev, rail) + }; + let (state, serial) = if present_l_bits & (1 << PSU_PRESENT_L_PINS[i]) + == 0 + { + // Hello, who are you? + let serial = + retry_i2c_txn(start_time, i as u8, I2cTxn::ReadSerial, || { + dev.serial_number() + }) + .ok(); + // ...and how are you doing? + let state = PsuState::Present(if initial_psu_enabled[i] { + ringbuf_entry!(( + start_time, + Trace::FoundEnabled { + psu: i as u8, + serial + } + )); + PresentState::On + } else { + // PSU was forced off by our previous incarnation. Schedule it to + // turn back on in the future if things clear up. + ringbuf_entry!(( + start_time, + Trace::FoundAlreadyDisabled { + psu: i as u8, + serial + } + )); + PresentState::Faulted { + turn_on_deadline: start_time.saturating_add(FAULT_OFF_MS), + } + }); + (state, serial) } else { - // PSU was forced off by our previous incarnation. Schedule it to - // turn back on in the future if things clear up. - ringbuf_entry!(( - start_time, - Trace::FoundAlreadyDisabled { psu: i as u8 } - )); - PresentState::Faulted { - turn_on_deadline: start_time.saturating_add(FAULT_OFF_MS), - } - }) + (PsuState::NotPresent, None) + }; + Psu { + slot: i as u8, + state, + dev, + serial, + } }); - let mut psus = psu_states.map(|state| Psu { state }); // Turn the chassis LED on to indicate that we're alive. sys.gpio_set(STATUS_LED); @@ -432,7 +502,8 @@ fn main() -> ! { } else { Status::NotGood }; - match psus[i].step(now, present, ok) { + let step = psus[i].step(now, present, ok); + match step.action { None => (), Some(ActionRequired::EnableMe) => { @@ -465,6 +536,9 @@ fn main() -> ! { ); } } + if let Some(ereport) = step.ereport { + ereport.deliver(&packrat, now); + } } // Wait for a pin change or timer. @@ -497,7 +571,11 @@ enum Status { } struct Psu { + slot: u8, state: PsuState, + dev: Mwocp68, + serial: Option, + // eeprom: M24C02, } impl Psu { @@ -507,16 +585,11 @@ impl Psu { /// This may be called at unpredictable intervals, and may be called more /// than once for the same timestamp value. The implementation **must** use /// `now` and the timer to control any time-sensitive operations. - fn step( - &mut self, - now: u64, - present: Present, - pwr_ok: Status, - ) -> Option { + fn step(&mut self, now: u64, present: Present, pwr_ok: Status) -> Step { match (self.state, present, pwr_ok) { (PsuState::NotPresent, Present::No, _) => { // ignore the power good line, it is meaningless. - None + Step::default() } // Regardless of our current state, if we observe the present line @@ -526,10 +599,24 @@ impl Psu { // decision is that the "NewlyInserted" settle time starts after the // contacts are _done_ scraping, not when they start. (_, Present::No, _) => { + let ereport = ereport::Ereport { + class: ereport::Class::Removed, + v: 0, + dev_id: self.dev.i2c_device().component_id(), + psu: self.psu_id(), + pmbus_status: None, + }; + self.state = PsuState::NotPresent; - Some(ActionRequired::DisableMe { - attempt_snapshot: false, - }) + // Clear the cached serial only *after* we have put it in the ereport. + self.serial = None; + + Step { + action: Some(ActionRequired::DisableMe { + attempt_snapshot: false, + }), + ereport: Some(ereport), + } } // In a not-present situation we have to ignore the OK line entirely @@ -540,8 +627,10 @@ impl Psu { self.state = PsuState::Present(PresentState::NewlyInserted { settle_deadline, }); + // Hello, who are you? + self.update_serial(now); // No external action required until our timer elapses. - None + Step::default() } ( @@ -551,21 +640,36 @@ impl Psu { _, _, ) => { + self.update_serial(now); if settle_deadline <= now { // The PSU is still present (since the Present::No case above // didn't fire) and our deadline has elapsed. Let's treat this // as valid! self.state = PsuState::Present(PresentState::On); - Some(ActionRequired::EnableMe) + let ereport = ereport::Ereport { + class: ereport::Class::Inserted, + v: 0, + dev_id: self.dev.i2c_device().component_id(), + psu: self.psu_id(), + pmbus_status: None, + }; + + Step { + action: Some(ActionRequired::EnableMe), + ereport: Some(ereport), + } } else { // Remain in this state. - None + Step::default() } } // yay! - (PsuState::Present(PresentState::On), _, Status::Good) => None, + (PsuState::Present(PresentState::On), _, Status::Good) => { + self.update_serial(now); + Step::default() + } (PsuState::Present(PresentState::On), _, Status::NotGood) => { // The PSU appears to have pulled the OK signal into the "not // OK" state to indicate an internal fault! @@ -574,9 +678,20 @@ impl Psu { self.state = PsuState::Present(PresentState::Faulted { turn_on_deadline, }); - Some(ActionRequired::DisableMe { - attempt_snapshot: true, - }) + let ereport = ereport::Ereport { + class: ereport::Class::Fault, + v: 0, + dev_id: self.dev.i2c_device().component_id(), + psu: self.psu_id(), + pmbus_status: None, // TODO(eliza) + }; + + Step { + action: Some(ActionRequired::DisableMe { + attempt_snapshot: true, + }), + ereport: Some(ereport), + } } ( @@ -591,10 +706,13 @@ impl Psu { self.state = PsuState::Present(PresentState::OnProbation { deadline: now.saturating_add(PROBATION_MS), }); - Some(ActionRequired::EnableMe) + Step { + action: Some(ActionRequired::EnableMe), + ereport: None, + } } else { // Remain in this state. - None + Step::default() } } ( @@ -606,13 +724,184 @@ impl Psu { // Take PSU out of probation state and start monitoring its // OK line. self.state = PsuState::Present(PresentState::On); + let ereport = ereport::Ereport { + class: ereport::Class::FaultCleared, + v: 0, + dev_id: self.dev.i2c_device().component_id(), + psu: self.psu_id(), + pmbus_status: None, // TODO(eliza) + }; + Step { + ereport: Some(ereport), + action: None, + } } else { // Remain in this state. + Step::default() + } + } + } + } + + fn update_serial(&mut self, now: u64) { + if self.serial.is_some() { + return; + } + self.serial = retry_i2c_txn(now, self.slot, I2cTxn::ReadSerial, || { + self.dev.serial_number() + }) + .ok(); + } + + fn psu_id(&self) -> ereport::Psu { + ereport::Psu { + serial: self.serial, + slot: self.slot, + } + } +} + +#[derive(Default)] +struct Step { + action: Option, + ereport: Option, +} + +fn retry_i2c_txn( + now: u64, + psu: u8, + which: I2cTxn, + mut txn: impl FnMut() -> Result, +) -> Result { + // Chosen by fair dice roll, seems reasonable-ish? + let mut retries_remaining = 3; + loop { + match txn() { + Ok(x) => return Ok(x), + Err(err) => { + ringbuf_entry!(( + now, + Trace::I2cError { + psu, + txn: which, + err + } + )); + + if retries_remaining == 0 { + // ringbuf_entry!((now, Trace::I2cFault { psu, txn })); + return Err(err); } - None + + // ringbuf_entry!(Trace::I2cRetry { + // psu, + // txn, + // retries_remaining + // }); + + retries_remaining -= 1; } } } } include!(concat!(env!("OUT_DIR"), "/notifications.rs")); + +include!(concat!(env!("OUT_DIR"), "/i2c_config.rs")); + +mod ereport { + use super::*; + use serde::Serialize; + + #[derive(Copy, Clone, Eq, PartialEq, Serialize)] + pub(super) enum Class { + #[serde(rename = "psu.insert")] + Inserted, + #[serde(rename = "psu.remove")] + Removed, + #[serde(rename = "psu.fault")] + Fault, + #[serde(rename = "psu.fault_cleared")] + FaultCleared, + } + + #[derive(Copy, Clone, Serialize)] + pub(super) struct Ereport { + #[serde(rename = "k")] + pub(super) class: Class, + pub(super) v: u32, + pub(super) dev_id: &'static str, + pub(super) psu: Psu, + #[serde(skip_serializing_if = "Option::is_none")] + pub(super) pmbus_status: Option, + } + + impl Ereport { + // This is in its own function so that the ereport buffer and `Ereport` struct + // are only on the stack while we're using it, and not for the entireity of + // `record_pmbus_status`, which calls into a bunch of other functions. This may + // reduce our stack depth a bit. + #[inline(never)] + pub(super) fn deliver(&self, packrat: &Packrat, now: u64) { + let mut ereport_buf = [0u8; 256]; + let writer = + minicbor::encode::write::Cursor::new(&mut ereport_buf[..]); + let mut s = minicbor_serde::Serializer::new(writer); + match self.serialize(&mut s) { + Ok(_) => { + let len = s.into_encoder().into_writer().position(); + packrat.deliver_ereport(&ereport_buf[..len]); + ringbuf_entry!(( + now, + Trace::EreportSentOff { + psu: self.psu.slot, + class: self.class, + len, + } + )); + } + Err(_) => { + // XXX(eliza): ereport didn't fit in buffer...what do + ringbuf_entry!(( + now, + Trace::EreportTooBig { + psu: self.psu.slot, + class: self.class + } + )); + } + } + } + } + + #[derive(Copy, Clone, Serialize)] + pub(super) struct Psu { + pub(super) slot: u8, + #[serde(serialize_with = "serialize_serial")] + pub(super) serial: Option, + } + + #[derive(Copy, Clone, Default, Serialize)] + pub(super) struct PmbusStatus { + pub(super) word: Option, + pub(super) input: Option, + pub(super) iout: Option, + pub(super) vout: Option, + pub(super) temp: Option, + pub(super) cml: Option, + pub(super) mfr: Option, + } + + fn serialize_serial( + serial: &Option, + serializer: S, + ) -> Result + where + S: serde::Serializer, + { + serial + .as_ref() + .and_then(|mwocp68::SerialNumber(s)| str::from_utf8(s).ok()) + .serialize(serializer) + } +} From aa2a804c67bbb57e0f9920042ee80dc54c7b76d1 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Thu, 4 Sep 2025 10:30:10 -0700 Subject: [PATCH 2/7] read *all* FRU ID values --- drv/i2c-devices/src/mwocp68.rs | 44 +++++++++ drv/psc-seq-server/src/main.rs | 174 +++++++++++++++++---------------- 2 files changed, 135 insertions(+), 83 deletions(-) diff --git a/drv/i2c-devices/src/mwocp68.rs b/drv/i2c-devices/src/mwocp68.rs index bbd8fb640b..5ac67bd06c 100644 --- a/drv/i2c-devices/src/mwocp68.rs +++ b/drv/i2c-devices/src/mwocp68.rs @@ -33,6 +33,20 @@ pub struct FirmwareRev(pub [u8; 4]); #[derive(Copy, Clone, PartialEq, Eq, Default)] pub struct SerialNumber(pub [u8; 12]); +/// Manufacturer model number. +/// +/// Per Murata Application Note ACAN-114.A01.D03 "PMBus Communication Protocol", +/// this is always a 17-byte ASCII string. It should be "MWOCP68-3600-D-RM". +#[derive(Copy, Clone, PartialEq, Eq, Default)] +pub struct ModelNumber(pub [u8; 17]); + +/// Manufacturer ID. +/// +/// Per Murata Application Note ACAN-114.A01.D03 "PMBus Communication Protocol", +/// this is always a 9-byte ASCII string. It should be "Murata-PS". +#[derive(Copy, Clone, PartialEq, Eq, Default)] +pub struct MfrId(pub [u8; 9]); + // // The boot loader command -- sent via BOOT_LOADER_CMD -- is unfortunately odd // in that its command code is overloaded with BOOT_LOADER_STATUS. (That is, @@ -89,6 +103,12 @@ pub enum Error { code: ResponseCode, }, ChecksumNotSuccessful, + BadModelNumberRead { + code: ResponseCode, + }, + BadMfrIdRead { + code: ResponseCode, + }, } impl From for Error { @@ -535,6 +555,30 @@ impl Mwocp68 { Ok(serial) } + /// + /// Returns the manufacturer model number of the PSU. + /// + pub fn model_number(&self) -> Result { + let mut model = ModelNumber::default(); + let _ = self + .device + .read_block(CommandCode::MFR_MODEL as u8, &mut model.0) + .map_err(|code| Error::BadModelNumberRead { code })?; + Ok(model) + } + + /// + /// Returns the manufacturer ID of the PSU. + /// + pub fn mfr_id(&self) -> Result { + let mut id = MfrId::default(); + let _ = self + .device + .read_block(CommandCode::MFR_ID as u8, &mut id.0) + .map_err(|code| Error::BadMfrIdRead { code })?; + Ok(id) + } + fn get_boot_loader_status( &self, ) -> Result { diff --git a/drv/psc-seq-server/src/main.rs b/drv/psc-seq-server/src/main.rs index 78d8667ec0..4ab57c1425 100644 --- a/drv/psc-seq-server/src/main.rs +++ b/drv/psc-seq-server/src/main.rs @@ -99,11 +99,8 @@ #![no_std] #![no_main] -use drv_i2c_api::{self as i2c, I2cDevice}; -use drv_i2c_devices::{ - m24c02::M24C02, - mwocp68::{self, Mwocp68}, -}; +use drv_i2c_api::I2cDevice; +use drv_i2c_devices::mwocp68::{self, Mwocp68}; use drv_packrat_vpd_loader::{read_vpd_and_load_packrat, Packrat}; use drv_psc_seq_api::PowerState; use drv_stm32xx_sys_api as sys_api; @@ -126,13 +123,13 @@ enum Trace { /// detect it as "on" due to the pull resistors.) FoundEnabled { psu: u8, - serial: Option, + serial: Option<[u8; 12]>, }, /// Emitted at task startup when we find that a power supply appears to have /// been disabled. FoundAlreadyDisabled { psu: u8, - serial: Option, + serial: Option<[u8; 12]>, }, /// Emitted when we decide a power supply should be on. Enabling { @@ -147,7 +144,6 @@ enum Trace { }, I2cError { psu: u8, - txn: I2cTxn, err: mwocp68::Error, }, EreportSentOff { @@ -161,11 +157,6 @@ enum Trace { }, } -#[derive(Copy, Clone, PartialEq, Eq)] -enum I2cTxn { - ReadSerial, -} - ringbuf!((u64, Trace), 128, (0, Trace::Empty)); const STATUS_LED: sys_api::PinSet = sys_api::Port::A.pin(3); @@ -427,22 +418,17 @@ fn main() -> ! { let (dev, rail) = make_dev(i2c); Mwocp68::new(&dev, rail) }; - let (state, serial) = if present_l_bits & (1 << PSU_PRESENT_L_PINS[i]) - == 0 - { + let mut fruid = PsuFruid::default(); + let state = if present_l_bits & (1 << PSU_PRESENT_L_PINS[i]) == 0 { // Hello, who are you? - let serial = - retry_i2c_txn(start_time, i as u8, I2cTxn::ReadSerial, || { - dev.serial_number() - }) - .ok(); + fruid.refresh(&dev, i as u8, start_time); // ...and how are you doing? let state = PsuState::Present(if initial_psu_enabled[i] { ringbuf_entry!(( start_time, Trace::FoundEnabled { psu: i as u8, - serial + serial: fruid.serial } )); PresentState::On @@ -453,22 +439,22 @@ fn main() -> ! { start_time, Trace::FoundAlreadyDisabled { psu: i as u8, - serial + serial: fruid.serial } )); PresentState::Faulted { turn_on_deadline: start_time.saturating_add(FAULT_OFF_MS), } }); - (state, serial) + state } else { - (PsuState::NotPresent, None) + PsuState::NotPresent }; Psu { slot: i as u8, state, dev, - serial, + fruid, } }); @@ -574,8 +560,10 @@ struct Psu { slot: u8, state: PsuState, dev: Mwocp68, - serial: Option, - // eeprom: M24C02, + /// Because we would like to include the PSU's FRU ID information in the + /// ereports generated when a PSU is *removed*, we must cache it here rather + /// than reading it from the device when we generate an ereport for it. + fruid: PsuFruid, } impl Psu { @@ -601,15 +589,16 @@ impl Psu { (_, Present::No, _) => { let ereport = ereport::Ereport { class: ereport::Class::Removed, - v: 0, + version: 0, dev_id: self.dev.i2c_device().component_id(), - psu: self.psu_id(), + psu_slot: self.slot, + fruid: self.fruid, pmbus_status: None, }; self.state = PsuState::NotPresent; - // Clear the cached serial only *after* we have put it in the ereport. - self.serial = None; + // Clear the FRUID serial only *after* we have put it in the ereport. + self.fruid = PsuFruid::default(); Step { action: Some(ActionRequired::DisableMe { @@ -628,7 +617,7 @@ impl Psu { settle_deadline, }); // Hello, who are you? - self.update_serial(now); + self.fruid.refresh(&self.dev, self.slot, now); // No external action required until our timer elapses. Step::default() } @@ -640,7 +629,8 @@ impl Psu { _, _, ) => { - self.update_serial(now); + // Hello, who are you? + self.fruid.refresh(&self.dev, self.slot, now); if settle_deadline <= now { // The PSU is still present (since the Present::No case above // didn't fire) and our deadline has elapsed. Let's treat this @@ -648,9 +638,10 @@ impl Psu { self.state = PsuState::Present(PresentState::On); let ereport = ereport::Ereport { class: ereport::Class::Inserted, - v: 0, + version: 0, dev_id: self.dev.i2c_device().component_id(), - psu: self.psu_id(), + psu_slot: self.slot, + fruid: self.fruid, pmbus_status: None, }; @@ -666,7 +657,9 @@ impl Psu { // yay! (PsuState::Present(PresentState::On), _, Status::Good) => { - self.update_serial(now); + // Just in case we were previously unable to read any FRUID + // values due to I2C weather, try to refresh them + self.fruid.refresh(&self.dev, self.slot, now); Step::default() } @@ -680,9 +673,10 @@ impl Psu { }); let ereport = ereport::Ereport { class: ereport::Class::Fault, - v: 0, + version: 0, dev_id: self.dev.i2c_device().component_id(), - psu: self.psu_id(), + psu_slot: self.slot, + fruid: self.fruid, pmbus_status: None, // TODO(eliza) }; @@ -720,15 +714,19 @@ impl Psu { _, _, ) => { + // Just in case we were previously unable to read any FRUID + // values due to I2C weather, try to refresh them + self.fruid.refresh(&self.dev, self.slot, now); if deadline <= now { // Take PSU out of probation state and start monitoring its // OK line. self.state = PsuState::Present(PresentState::On); let ereport = ereport::Ereport { class: ereport::Class::FaultCleared, - v: 0, + version: 0, dev_id: self.dev.i2c_device().component_id(), - psu: self.psu_id(), + psu_slot: self.slot, + fruid: self.fruid, pmbus_status: None, // TODO(eliza) }; Step { @@ -742,23 +740,6 @@ impl Psu { } } } - - fn update_serial(&mut self, now: u64) { - if self.serial.is_some() { - return; - } - self.serial = retry_i2c_txn(now, self.slot, I2cTxn::ReadSerial, || { - self.dev.serial_number() - }) - .ok(); - } - - fn psu_id(&self) -> ereport::Psu { - ereport::Psu { - serial: self.serial, - slot: self.slot, - } - } } #[derive(Default)] @@ -767,10 +748,48 @@ struct Step { ereport: Option, } +#[derive(Copy, Clone, serde::Serialize, Default)] +struct PsuFruid { + #[serde(serialize_with = "ereport::serialize_fixed_str")] + mfr: Option<[u8; 9]>, + #[serde(serialize_with = "ereport::serialize_fixed_str")] + mpn: Option<[u8; 17]>, + #[serde(serialize_with = "ereport::serialize_fixed_str")] + serial: Option<[u8; 12]>, + #[serde(serialize_with = "ereport::serialize_fixed_str")] + fw_rev: Option<[u8; 4]>, +} + +impl PsuFruid { + fn refresh(&mut self, dev: &Mwocp68, psu: u8, now: u64) { + if self.mfr.is_none() { + self.mfr = + retry_i2c_txn(now, psu, || dev.mfr_id()).ok().map(|v| v.0); + } + + if self.serial.is_none() { + self.serial = retry_i2c_txn(now, psu, || dev.serial_number()) + .ok() + .map(|v| v.0); + } + + if self.mpn.is_none() { + self.mpn = retry_i2c_txn(now, psu, || dev.model_number()) + .ok() + .map(|v| v.0); + } + + if self.fw_rev.is_none() { + self.fw_rev = retry_i2c_txn(now, psu, || dev.firmware_revision()) + .ok() + .map(|v| v.0); + } + } +} + fn retry_i2c_txn( now: u64, psu: u8, - which: I2cTxn, mut txn: impl FnMut() -> Result, ) -> Result { // Chosen by fair dice roll, seems reasonable-ish? @@ -779,14 +798,7 @@ fn retry_i2c_txn( match txn() { Ok(x) => return Ok(x), Err(err) => { - ringbuf_entry!(( - now, - Trace::I2cError { - psu, - txn: which, - err - } - )); + ringbuf_entry!((now, Trace::I2cError { psu, err })); if retries_remaining == 0 { // ringbuf_entry!((now, Trace::I2cFault { psu, txn })); @@ -829,9 +841,11 @@ mod ereport { pub(super) struct Ereport { #[serde(rename = "k")] pub(super) class: Class, - pub(super) v: u32, + #[serde(rename = "v")] + pub(super) version: u32, pub(super) dev_id: &'static str, - pub(super) psu: Psu, + pub(super) psu_slot: u8, + pub(super) fruid: PsuFruid, #[serde(skip_serializing_if = "Option::is_none")] pub(super) pmbus_status: Option, } @@ -854,7 +868,7 @@ mod ereport { ringbuf_entry!(( now, Trace::EreportSentOff { - psu: self.psu.slot, + psu: self.psu_slot, class: self.class, len, } @@ -865,7 +879,7 @@ mod ereport { ringbuf_entry!(( now, Trace::EreportTooBig { - psu: self.psu.slot, + psu: self.psu_slot, class: self.class } )); @@ -874,13 +888,6 @@ mod ereport { } } - #[derive(Copy, Clone, Serialize)] - pub(super) struct Psu { - pub(super) slot: u8, - #[serde(serialize_with = "serialize_serial")] - pub(super) serial: Option, - } - #[derive(Copy, Clone, Default, Serialize)] pub(super) struct PmbusStatus { pub(super) word: Option, @@ -892,16 +899,17 @@ mod ereport { pub(super) mfr: Option, } - fn serialize_serial( - serial: &Option, + /// XXX(eliza): A "fixed length byte string" helper would be a nice thing to + /// have... + pub(super) fn serialize_fixed_str( + s: &Option<[u8; LEN]>, serializer: S, ) -> Result where S: serde::Serializer, { - serial - .as_ref() - .and_then(|mwocp68::SerialNumber(s)| str::from_utf8(s).ok()) + s.as_ref() + .and_then(|s| str::from_utf8(&s[..]).ok()) .serialize(serializer) } } From f7db591c5dc032ae758158dc4a481375f4491d78 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Thu, 4 Sep 2025 10:48:32 -0700 Subject: [PATCH 3/7] read PMBus status on faults --- drv/i2c-devices/src/mwocp68.rs | 35 +++++++++++++++++++++++++ drv/psc-seq-server/src/main.rs | 48 ++++++++++++++++++++++++++++++---- 2 files changed, 78 insertions(+), 5 deletions(-) diff --git a/drv/i2c-devices/src/mwocp68.rs b/drv/i2c-devices/src/mwocp68.rs index 5ac67bd06c..e89a98439d 100644 --- a/drv/i2c-devices/src/mwocp68.rs +++ b/drv/i2c-devices/src/mwocp68.rs @@ -579,6 +579,41 @@ impl Mwocp68 { Ok(id) } + pub fn status_word(&self) -> Result { + // Per ACAN-114, this is always on page 0. + pmbus_rail_read!(self.device, 0, STATUS_WORD) + } + + pub fn status_iout(&self) -> Result { + // Per ACAN-114, this is always on page 0. + pmbus_rail_read!(self.device, 0, STATUS_IOUT) + } + + pub fn status_vout(&self) -> Result { + // Per ACAN-114, this is always on page 0. + pmbus_rail_read!(self.device, 0, STATUS_VOUT) + } + + pub fn status_input(&self) -> Result { + pmbus_read!(self.device, STATUS_INPUT) + } + + pub fn status_cml(&self) -> Result { + pmbus_read!(self.device, STATUS_CML) + } + + pub fn status_temperature( + &self, + ) -> Result { + pmbus_read!(self.device, STATUS_TEMPERATURE) + } + + pub fn status_mfr_specific( + &self, + ) -> Result { + pmbus_read!(self.device, STATUS_MFR_SPECIFIC) + } + fn get_boot_loader_status( &self, ) -> Result { diff --git a/drv/psc-seq-server/src/main.rs b/drv/psc-seq-server/src/main.rs index 4ab57c1425..a30a067999 100644 --- a/drv/psc-seq-server/src/main.rs +++ b/drv/psc-seq-server/src/main.rs @@ -423,7 +423,7 @@ fn main() -> ! { // Hello, who are you? fruid.refresh(&dev, i as u8, start_time); // ...and how are you doing? - let state = PsuState::Present(if initial_psu_enabled[i] { + PsuState::Present(if initial_psu_enabled[i] { ringbuf_entry!(( start_time, Trace::FoundEnabled { @@ -445,8 +445,7 @@ fn main() -> ! { PresentState::Faulted { turn_on_deadline: start_time.saturating_add(FAULT_OFF_MS), } - }); - state + }) } else { PsuState::NotPresent }; @@ -677,7 +676,7 @@ impl Psu { dev_id: self.dev.i2c_device().component_id(), psu_slot: self.slot, fruid: self.fruid, - pmbus_status: None, // TODO(eliza) + pmbus_status: Some(self.read_pmbus_status(now)), }; Step { @@ -727,7 +726,7 @@ impl Psu { dev_id: self.dev.i2c_device().component_id(), psu_slot: self.slot, fruid: self.fruid, - pmbus_status: None, // TODO(eliza) + pmbus_status: Some(self.read_pmbus_status(now)), }; Step { ereport: Some(ereport), @@ -740,6 +739,45 @@ impl Psu { } } } + + fn refresh_fruid(&mut self, now: u64) { + self.fruid.refresh(&self.dev, self.slot, now); + } + + fn read_pmbus_status(&mut self, now: u64) -> ereport::PmbusStatus { + let word = retry_i2c_txn(now, self.slot, || self.dev.status_word()) + .map(|data| data.0) + .ok(); + let iout = retry_i2c_txn(now, self.slot, || self.dev.status_iout()) + .map(|data| data.0) + .ok(); + let vout = retry_i2c_txn(now, self.slot, || self.dev.status_vout()) + .map(|data| data.0) + .ok(); + let input = retry_i2c_txn(now, self.slot, || self.dev.status_vout()) + .map(|data| data.0) + .ok(); + let cml = retry_i2c_txn(now, self.slot, || self.dev.status_cml()) + .map(|data| data.0) + .ok(); + let temp = + retry_i2c_txn(now, self.slot, || self.dev.status_temperature()) + .map(|data| data.0) + .ok(); + let mfr = + retry_i2c_txn(now, self.slot, || self.dev.status_mfr_specific()) + .map(|data| data.0) + .ok(); + ereport::PmbusStatus { + word, + iout, + vout, + input, + cml, + temp, + mfr, + } + } } #[derive(Default)] From 391370305cce8ca98aedc7603b40a1d44adb8eb9 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Thu, 4 Sep 2025 11:12:43 -0700 Subject: [PATCH 4/7] put more verbose debug traces in their own ringbuf --- drv/psc-seq-server/src/main.rs | 228 +++++++++++++++++++++++++-------- 1 file changed, 176 insertions(+), 52 deletions(-) diff --git a/drv/psc-seq-server/src/main.rs b/drv/psc-seq-server/src/main.rs index a30a067999..d8b7ebb6b4 100644 --- a/drv/psc-seq-server/src/main.rs +++ b/drv/psc-seq-server/src/main.rs @@ -116,7 +116,7 @@ task_slot!(JEFE, jefe); task_slot!(PACKRAT, packrat); #[derive(Copy, Clone, PartialEq, Eq)] -enum Trace { +enum Event { Empty, /// Emitted at task startup when we find that a power supply is probably /// already on. (Note that if the power supply is not present, we will still @@ -131,6 +131,15 @@ enum Trace { psu: u8, serial: Option<[u8; 12]>, }, + /// Emitted when a previously not present PSU's presence pin is asserted. + Inserted { + psu: u8, + serial: Option<[u8; 12]>, + }, + /// Emitted when a previously present PSU's presence pin is deasserted. + Removed { + psu: u8, + }, /// Emitted when we decide a power supply should be on. Enabling { psu: u8, @@ -142,22 +151,73 @@ enum Trace { psu: u8, present: bool, }, +} + +ringbuf!((u64, Event), 128, (0, Event::Empty)); + +/// More verbose debugging data goes in its own ring buffer, so that we can +/// maintain a longer history of major PSU events while still recording more +/// detailed information about the PSU's status. +/// +/// Each of these entries has a `now` value which can be correlated with the +/// timestamps in the main ringbuf. +#[derive(Copy, Clone, PartialEq, Eq)] +enum Trace { + None, + StatusWord { + now: u64, + psu: u8, + status_word: Result, + }, + StatusIout { + now: u64, + psu: u8, + status_iout: Result, + }, + StatusVout { + now: u64, + psu: u8, + status_vout: Result, + }, + StatusInput { + now: u64, + psu: u8, + status_input: Result, + }, + StatusCml { + now: u64, + psu: u8, + status_cml: Result, + }, + StatusTemperature { + now: u64, + psu: u8, + status_temperature: Result, + }, + StatusMfrSpecific { + now: u64, + psu: u8, + status_mfr_specific: Result, + }, I2cError { + now: u64, psu: u8, err: mwocp68::Error, }, EreportSentOff { + now: u64, psu: u8, class: ereport::Class, len: usize, }, EreportTooBig { + now: u64, psu: u8, class: ereport::Class, }, } -ringbuf!((u64, Trace), 128, (0, Trace::Empty)); +ringbuf!(__TRACE, Trace, 32, Trace::None); const STATUS_LED: sys_api::PinSet = sys_api::Port::A.pin(3); @@ -426,7 +486,7 @@ fn main() -> ! { PsuState::Present(if initial_psu_enabled[i] { ringbuf_entry!(( start_time, - Trace::FoundEnabled { + Event::FoundEnabled { psu: i as u8, serial: fruid.serial } @@ -437,7 +497,7 @@ fn main() -> ! { // turn back on in the future if things clear up. ringbuf_entry!(( start_time, - Trace::FoundAlreadyDisabled { + Event::FoundAlreadyDisabled { psu: i as u8, serial: fruid.serial } @@ -492,7 +552,7 @@ fn main() -> ! { None => (), Some(ActionRequired::EnableMe) => { - ringbuf_entry!((now, Trace::Enabling { psu: i as u8 })); + ringbuf_entry!((now, Event::Enabling { psu: i as u8 })); // Enable the PSU by allowing `ENABLE_L` to float low, by no // longer asserting high. sys.gpio_configure_input( @@ -506,7 +566,7 @@ fn main() -> ! { } ringbuf_entry!(( now, - Trace::Disabling { + Event::Disabling { psu: i as u8, present: attempt_snapshot, } @@ -586,6 +646,7 @@ impl Psu { // decision is that the "NewlyInserted" settle time starts after the // contacts are _done_ scraping, not when they start. (_, Present::No, _) => { + ringbuf_entry!((now, Event::Removed { psu: self.slot })); let ereport = ereport::Ereport { class: ereport::Class::Removed, version: 0, @@ -616,7 +677,15 @@ impl Psu { settle_deadline, }); // Hello, who are you? - self.fruid.refresh(&self.dev, self.slot, now); + self.fruid = PsuFruid::default(); + self.refresh_fruid(now); + ringbuf_entry!(( + now, + Event::Inserted { + psu: self.slot, + serial: self.fruid.serial + } + )); // No external action required until our timer elapses. Step::default() } @@ -629,7 +698,7 @@ impl Psu { _, ) => { // Hello, who are you? - self.fruid.refresh(&self.dev, self.slot, now); + self.refresh_fruid(now); if settle_deadline <= now { // The PSU is still present (since the Present::No case above // didn't fire) and our deadline has elapsed. Let's treat this @@ -658,7 +727,7 @@ impl Psu { (PsuState::Present(PresentState::On), _, Status::Good) => { // Just in case we were previously unable to read any FRUID // values due to I2C weather, try to refresh them - self.fruid.refresh(&self.dev, self.slot, now); + self.refresh_fruid(now); Step::default() } @@ -715,7 +784,7 @@ impl Psu { ) => { // Just in case we were previously unable to read any FRUID // values due to I2C weather, try to refresh them - self.fruid.refresh(&self.dev, self.slot, now); + self.refresh_fruid(now); if deadline <= now { // Take PSU out of probation state and start monitoring its // OK line. @@ -745,37 +814,97 @@ impl Psu { } fn read_pmbus_status(&mut self, now: u64) -> ereport::PmbusStatus { - let word = retry_i2c_txn(now, self.slot, || self.dev.status_word()) - .map(|data| data.0) - .ok(); - let iout = retry_i2c_txn(now, self.slot, || self.dev.status_iout()) - .map(|data| data.0) - .ok(); - let vout = retry_i2c_txn(now, self.slot, || self.dev.status_vout()) - .map(|data| data.0) - .ok(); - let input = retry_i2c_txn(now, self.slot, || self.dev.status_vout()) - .map(|data| data.0) - .ok(); - let cml = retry_i2c_txn(now, self.slot, || self.dev.status_cml()) - .map(|data| data.0) - .ok(); - let temp = + let status_word = + retry_i2c_txn(now, self.slot, || self.dev.status_word()) + .map(|data| data.0); + ringbuf_entry!( + __TRACE, + Trace::StatusWord { + psu: self.slot, + now, + status_word + } + ); + + let status_iout = + retry_i2c_txn(now, self.slot, || self.dev.status_iout()) + .map(|data| data.0); + ringbuf_entry!( + __TRACE, + Trace::StatusIout { + psu: self.slot, + now, + status_iout + } + ); + + let status_vout = + retry_i2c_txn(now, self.slot, || self.dev.status_vout()) + .map(|data| data.0); + ringbuf_entry!( + __TRACE, + Trace::StatusVout { + psu: self.slot, + now, + status_vout + } + ); + let status_input = + retry_i2c_txn(now, self.slot, || self.dev.status_vout()) + .map(|data| data.0); + ringbuf_entry!( + __TRACE, + Trace::StatusInput { + psu: self.slot, + now, + status_input, + } + ); + + let status_cml = + retry_i2c_txn(now, self.slot, || self.dev.status_cml()) + .map(|data| data.0); + ringbuf_entry!( + __TRACE, + Trace::StatusCml { + psu: self.slot, + now, + status_cml + } + ); + + let status_temperature = retry_i2c_txn(now, self.slot, || self.dev.status_temperature()) - .map(|data| data.0) - .ok(); - let mfr = + .map(|data| data.0); + ringbuf_entry!( + __TRACE, + Trace::StatusTemperature { + psu: self.slot, + now, + status_temperature + } + ); + + let status_mfr_specific = retry_i2c_txn(now, self.slot, || self.dev.status_mfr_specific()) - .map(|data| data.0) - .ok(); + .map(|data| data.0); + ringbuf_entry!( + __TRACE, + Trace::StatusMfrSpecific { + psu: self.slot, + now, + status_mfr_specific + } + ); + ereport::PmbusStatus { - word, - iout, - vout, - input, - cml, - temp, - mfr, + word: status_word.ok(), + iout: status_iout.ok(), + vout: status_vout.ok(), + input: status_input.ok(), + cml: status_cml.ok(), + temp: status_temperature.ok(), + mfr: status_mfr_specific.ok(), } } } @@ -836,19 +965,12 @@ fn retry_i2c_txn( match txn() { Ok(x) => return Ok(x), Err(err) => { - ringbuf_entry!((now, Trace::I2cError { psu, err })); + ringbuf_entry!(__TRACE, Trace::I2cError { now, psu, err }); if retries_remaining == 0 { - // ringbuf_entry!((now, Trace::I2cFault { psu, txn })); return Err(err); } - // ringbuf_entry!(Trace::I2cRetry { - // psu, - // txn, - // retries_remaining - // }); - retries_remaining -= 1; } } @@ -903,24 +1025,26 @@ mod ereport { Ok(_) => { let len = s.into_encoder().into_writer().position(); packrat.deliver_ereport(&ereport_buf[..len]); - ringbuf_entry!(( - now, + ringbuf_entry!( + __TRACE, Trace::EreportSentOff { + now, psu: self.psu_slot, class: self.class, len, } - )); + ); } Err(_) => { // XXX(eliza): ereport didn't fit in buffer...what do - ringbuf_entry!(( - now, + ringbuf_entry!( + __TRACE, Trace::EreportTooBig { + now, psu: self.psu_slot, class: self.class } - )); + ); } } } From 653b4f6879f5d3792f9b1727501fe4a6cd719ec3 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Thu, 4 Sep 2025 12:29:09 -0700 Subject: [PATCH 5/7] don't report "fault cleared" unless the fault clears --- drv/psc-seq-server/src/main.rs | 126 ++++++++++++++++++++++++++------- 1 file changed, 99 insertions(+), 27 deletions(-) diff --git a/drv/psc-seq-server/src/main.rs b/drv/psc-seq-server/src/main.rs index d8b7ebb6b4..569726f15e 100644 --- a/drv/psc-seq-server/src/main.rs +++ b/drv/psc-seq-server/src/main.rs @@ -164,6 +164,18 @@ ringbuf!((u64, Event), 128, (0, Event::Empty)); #[derive(Copy, Clone, PartialEq, Eq)] enum Trace { None, + Faulted { + now: u64, + psu: u8, + }, + FaultCleared { + now: u64, + psu: u8, + }, + StillInFault { + now: u64, + psu: u8, + }, StatusWord { now: u64, psu: u8, @@ -331,7 +343,15 @@ enum PresentState { /// /// We will exit this state if the OK line is pulled low, or if we detect a /// fault. - On, + On { + /// If `true`, the PSU was power-cycled by the PSC in attempt to clear a + /// fault. If it reasserts POWER_GOOD, that indicates that the fault has + /// cleared; otherwise, the fault is persistent. + /// + /// If `false`, the PSU was either newly inserted, or a previous fault + /// has cleared. A new fault should produce a new fault ereport. + was_faulted: bool, + }, /// The PSU has just appeared and we're waiting a bit to confirm that it's /// stable before turning it on. (Waiting in this state provides some @@ -491,7 +511,7 @@ fn main() -> ! { serial: fruid.serial } )); - PresentState::On + PresentState::On { was_faulted: false } } else { // PSU was forced off by our previous incarnation. Schedule it to // turn back on in the future if things clear up. @@ -703,7 +723,9 @@ impl Psu { // The PSU is still present (since the Present::No case above // didn't fire) and our deadline has elapsed. Let's treat this // as valid! - self.state = PsuState::Present(PresentState::On); + self.state = PsuState::Present(PresentState::On { + was_faulted: false, + }); let ereport = ereport::Ereport { class: ereport::Class::Inserted, version: 0, @@ -724,14 +746,53 @@ impl Psu { } // yay! - (PsuState::Present(PresentState::On), _, Status::Good) => { + ( + PsuState::Present(PresentState::On { was_faulted }), + _, + Status::Good, + ) => { // Just in case we were previously unable to read any FRUID // values due to I2C weather, try to refresh them self.refresh_fruid(now); - Step::default() + // If we just turned this PSU back on after a fault, reasserting + // POWER_GOOD means that the fault has cleared. + let ereport = if was_faulted { + // Clear our tracking of the fault. If we fault again, treat + // that as a new fault. + self.state = PsuState::Present(PresentState::On { + was_faulted: false, + }); + ringbuf_entry!( + __TRACE, + Trace::FaultCleared { + now, + psu: self.slot, + } + ); + // Report that the fault has gone away. + Some(ereport::Ereport { + class: ereport::Class::FaultCleared, + version: 0, + dev_id: self.dev.i2c_device().component_id(), + psu_slot: self.slot, + fruid: self.fruid, + pmbus_status: Some(self.read_pmbus_status(now)), + }) + } else { + // If we did not just restart after a fault, do nothing. + None + }; + Step { + action: None, + ereport, + } } - (PsuState::Present(PresentState::On), _, Status::NotGood) => { + ( + PsuState::Present(PresentState::On { was_faulted }), + _, + Status::NotGood, + ) => { // The PSU appears to have pulled the OK signal into the "not // OK" state to indicate an internal fault! @@ -739,20 +800,40 @@ impl Psu { self.state = PsuState::Present(PresentState::Faulted { turn_on_deadline, }); - let ereport = ereport::Ereport { - class: ereport::Class::Fault, - version: 0, - dev_id: self.dev.i2c_device().component_id(), - psu_slot: self.slot, - fruid: self.fruid, - pmbus_status: Some(self.read_pmbus_status(now)), + // Did we just restart after a fault? If not, this is a new + // fault, which should be reported. + let ereport = if !was_faulted { + ringbuf_entry!( + __TRACE, + Trace::Faulted { + now, + psu: self.slot, + } + ); + Some(ereport::Ereport { + class: ereport::Class::Fault, + version: 0, + dev_id: self.dev.i2c_device().component_id(), + psu_slot: self.slot, + fruid: self.fruid, + pmbus_status: Some(self.read_pmbus_status(now)), + }) + } else { + ringbuf_entry!( + __TRACE, + Trace::StillInFault { + now, + psu: self.slot, + } + ); + None }; Step { action: Some(ActionRequired::DisableMe { attempt_snapshot: true, }), - ereport: Some(ereport), + ereport, } } @@ -788,19 +869,10 @@ impl Psu { if deadline <= now { // Take PSU out of probation state and start monitoring its // OK line. - self.state = PsuState::Present(PresentState::On); - let ereport = ereport::Ereport { - class: ereport::Class::FaultCleared, - version: 0, - dev_id: self.dev.i2c_device().component_id(), - psu_slot: self.slot, - fruid: self.fruid, - pmbus_status: Some(self.read_pmbus_status(now)), - }; - Step { - ereport: Some(ereport), - action: None, - } + self.state = PsuState::Present(PresentState::On { + was_faulted: true, + }); + Step::default() } else { // Remain in this state. Step::default() From 005943e89a9eb99e960c8369853e088209d6b5c1 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Thu, 4 Sep 2025 13:00:17 -0700 Subject: [PATCH 6/7] let's add ringbuf counters! --- Cargo.lock | 1 + drv/psc-seq-server/Cargo.toml | 3 +- drv/psc-seq-server/src/main.rs | 179 ++++++++++++++++++++++----------- 3 files changed, 123 insertions(+), 60 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a3cdcea9fd..69029d1d6e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1990,6 +1990,7 @@ version = "0.1.0" dependencies = [ "build-i2c", "build-util", + "counters", "drv-i2c-api", "drv-i2c-devices", "drv-packrat-vpd-loader", diff --git a/drv/psc-seq-server/Cargo.toml b/drv/psc-seq-server/Cargo.toml index e727f3ffe8..306d3321d4 100644 --- a/drv/psc-seq-server/Cargo.toml +++ b/drv/psc-seq-server/Cargo.toml @@ -12,7 +12,8 @@ drv-i2c-devices.path = "../../drv/i2c-devices" task-jefe-api.path = "../../task/jefe-api" task-packrat-api.path = "../../task/packrat-api" userlib = { path = "../../sys/userlib", features = ["panic-messages"] } -ringbuf = { path = "../../lib/ringbuf" } +ringbuf = { path = "../../lib/ringbuf", features = ["counters"] } +counters = { path = "../../lib/counters" } serde.workspace = true minicbor.workspace = true minicbor-serde.workspace = true diff --git a/drv/psc-seq-server/src/main.rs b/drv/psc-seq-server/src/main.rs index 569726f15e..4b66a2a488 100644 --- a/drv/psc-seq-server/src/main.rs +++ b/drv/psc-seq-server/src/main.rs @@ -108,52 +108,65 @@ use sys_api::{Edge, IrqControl, OutputType, PinSet, Pull, Speed}; use task_jefe_api::Jefe; use userlib::*; -use ringbuf::{ringbuf, ringbuf_entry}; +use ringbuf::{counted_ringbuf, ringbuf_entry}; task_slot!(SYS, sys); task_slot!(I2C, i2c_driver); task_slot!(JEFE, jefe); task_slot!(PACKRAT, packrat); -#[derive(Copy, Clone, PartialEq, Eq)] +#[derive(Copy, Clone, PartialEq, Eq, counters::Count)] enum Event { - Empty, + #[count(skip)] + None, /// Emitted at task startup when we find that a power supply is probably /// already on. (Note that if the power supply is not present, we will still /// detect it as "on" due to the pull resistors.) FoundEnabled { - psu: u8, + now: u64, + #[count(children)] + psu: Slot, serial: Option<[u8; 12]>, }, /// Emitted at task startup when we find that a power supply appears to have /// been disabled. FoundAlreadyDisabled { - psu: u8, + now: u64, + #[count(children)] + psu: Slot, serial: Option<[u8; 12]>, }, /// Emitted when a previously not present PSU's presence pin is asserted. Inserted { - psu: u8, + now: u64, + #[count(children)] + psu: Slot, serial: Option<[u8; 12]>, }, /// Emitted when a previously present PSU's presence pin is deasserted. Removed { - psu: u8, + now: u64, + #[count(children)] + psu: Slot, }, /// Emitted when we decide a power supply should be on. Enabling { - psu: u8, + now: u64, + #[count(children)] + psu: Slot, }, /// Emitted when we decide a power supply should be off; the `present` flag /// means the PSU is being turned off despite being present (`true`) or is /// being disabled because it's been removed (`false`). Disabling { - psu: u8, + now: u64, + #[count(children)] + psu: Slot, present: bool, }, } -ringbuf!((u64, Event), 128, (0, Event::Empty)); +counted_ringbuf!(Event, 128, Event::None); /// More verbose debugging data goes in its own ring buffer, so that we can /// maintain a longer history of major PSU events while still recording more @@ -161,75 +174,104 @@ ringbuf!((u64, Event), 128, (0, Event::Empty)); /// /// Each of these entries has a `now` value which can be correlated with the /// timestamps in the main ringbuf. -#[derive(Copy, Clone, PartialEq, Eq)] +#[derive(Copy, Clone, PartialEq, Eq, counters::Count)] enum Trace { + #[count(skip)] None, Faulted { now: u64, - psu: u8, + #[count(children)] + psu: Slot, }, FaultCleared { now: u64, - psu: u8, + #[count(children)] + psu: Slot, }, StillInFault { now: u64, - psu: u8, + #[count(children)] + psu: Slot, }, + #[count(skip)] StatusWord { now: u64, - psu: u8, + psu: Slot, status_word: Result, }, + #[count(skip)] StatusIout { now: u64, - psu: u8, + psu: Slot, status_iout: Result, }, + #[count(skip)] StatusVout { now: u64, - psu: u8, + psu: Slot, status_vout: Result, }, + #[count(skip)] StatusInput { now: u64, - psu: u8, + psu: Slot, status_input: Result, }, + #[count(skip)] StatusCml { now: u64, - psu: u8, + psu: Slot, status_cml: Result, }, + #[count(skip)] StatusTemperature { now: u64, - psu: u8, + psu: Slot, status_temperature: Result, }, + #[count(skip)] StatusMfrSpecific { now: u64, - psu: u8, + psu: Slot, status_mfr_specific: Result, }, I2cError { now: u64, - psu: u8, + #[count(children)] + psu: Slot, err: mwocp68::Error, }, EreportSentOff { now: u64, - psu: u8, + #[count(children)] + psu: Slot, class: ereport::Class, len: usize, }, EreportTooBig { now: u64, - psu: u8, + #[count(children)] + psu: Slot, class: ereport::Class, }, } -ringbuf!(__TRACE, Trace, 32, Trace::None); +counted_ringbuf!(__TRACE, Trace, 32, Trace::None); + +/// PSU numbers represented as an enum. This is intended for use with +/// `counted_ringbuf!`, instead of representing PSU numbers as raw u8s, which +/// cannot derive `counters::Count` (and would have to generate a counter table +/// with 256 entries rather than just 6). +#[derive(Copy, Clone, Eq, PartialEq, counters::Count)] +#[repr(u8)] +enum Slot { + Psu0 = 0, + Psu1 = 1, + Psu2 = 2, + Psu3 = 3, + Psu4 = 4, + Psu5 = 5, +} const STATUS_LED: sys_api::PinSet = sys_api::Port::A.pin(3); @@ -286,6 +328,15 @@ const PSU_PMBUS_DEVS: [fn(TaskId) -> (I2cDevice, u8); PSU_COUNT] = [ i2c_config::pmbus::v54_psu5 as fn(TaskId) -> (I2cDevice, u8), ]; +const PSU_SLOTS: [Slot; PSU_COUNT] = [ + Slot::Psu0, + Slot::Psu1, + Slot::Psu2, + Slot::Psu3, + Slot::Psu4, + Slot::Psu5, +]; + /// How long to wait after task startup before we start trying to inspect /// things. const STARTUP_SETTLE_MS: u64 = 500; // Current value is somewhat arbitrary. @@ -498,30 +549,27 @@ fn main() -> ! { let (dev, rail) = make_dev(i2c); Mwocp68::new(&dev, rail) }; + let slot = PSU_SLOTS[i]; let mut fruid = PsuFruid::default(); let state = if present_l_bits & (1 << PSU_PRESENT_L_PINS[i]) == 0 { // Hello, who are you? - fruid.refresh(&dev, i as u8, start_time); + fruid.refresh(&dev, slot, start_time); // ...and how are you doing? PsuState::Present(if initial_psu_enabled[i] { - ringbuf_entry!(( - start_time, - Event::FoundEnabled { - psu: i as u8, - serial: fruid.serial - } - )); + ringbuf_entry!(Event::FoundEnabled { + now: start_time, + psu: slot, + serial: fruid.serial + }); PresentState::On { was_faulted: false } } else { // PSU was forced off by our previous incarnation. Schedule it to // turn back on in the future if things clear up. - ringbuf_entry!(( - start_time, - Event::FoundAlreadyDisabled { - psu: i as u8, - serial: fruid.serial - } - )); + ringbuf_entry!(Event::FoundAlreadyDisabled { + now: start_time, + psu: slot, + serial: fruid.serial + }); PresentState::Faulted { turn_on_deadline: start_time.saturating_add(FAULT_OFF_MS), } @@ -530,7 +578,7 @@ fn main() -> ! { PsuState::NotPresent }; Psu { - slot: i as u8, + slot, state, dev, fruid, @@ -572,7 +620,10 @@ fn main() -> ! { None => (), Some(ActionRequired::EnableMe) => { - ringbuf_entry!((now, Event::Enabling { psu: i as u8 })); + ringbuf_entry!(Event::Enabling { + now, + psu: PSU_SLOTS[i] + }); // Enable the PSU by allowing `ENABLE_L` to float low, by no // longer asserting high. sys.gpio_configure_input( @@ -584,13 +635,11 @@ fn main() -> ! { if attempt_snapshot { // TODO snapshot goes here } - ringbuf_entry!(( + ringbuf_entry!(Event::Disabling { now, - Event::Disabling { - psu: i as u8, - present: attempt_snapshot, - } - )); + psu: PSU_SLOTS[i], + present: attempt_snapshot, + }); // Pull `ENABLE_L` high to disable the PSU. sys.gpio_configure_output( @@ -636,7 +685,7 @@ enum Status { } struct Psu { - slot: u8, + slot: Slot, state: PsuState, dev: Mwocp68, /// Because we would like to include the PSU's FRU ID information in the @@ -666,7 +715,10 @@ impl Psu { // decision is that the "NewlyInserted" settle time starts after the // contacts are _done_ scraping, not when they start. (_, Present::No, _) => { - ringbuf_entry!((now, Event::Removed { psu: self.slot })); + ringbuf_entry!(Event::Removed { + now, + psu: self.slot + }); let ereport = ereport::Ereport { class: ereport::Class::Removed, version: 0, @@ -699,13 +751,11 @@ impl Psu { // Hello, who are you? self.fruid = PsuFruid::default(); self.refresh_fruid(now); - ringbuf_entry!(( + ringbuf_entry!(Event::Inserted { now, - Event::Inserted { - psu: self.slot, - serial: self.fruid.serial - } - )); + psu: self.slot, + serial: self.fruid.serial + }); // No external action required until our timer elapses. Step::default() } @@ -1000,7 +1050,7 @@ struct PsuFruid { } impl PsuFruid { - fn refresh(&mut self, dev: &Mwocp68, psu: u8, now: u64) { + fn refresh(&mut self, dev: &Mwocp68, psu: Slot, now: u64) { if self.mfr.is_none() { self.mfr = retry_i2c_txn(now, psu, || dev.mfr_id()).ok().map(|v| v.0); @@ -1028,7 +1078,7 @@ impl PsuFruid { fn retry_i2c_txn( now: u64, - psu: u8, + psu: Slot, mut txn: impl FnMut() -> Result, ) -> Result { // Chosen by fair dice roll, seems reasonable-ish? @@ -1076,7 +1126,8 @@ mod ereport { #[serde(rename = "v")] pub(super) version: u32, pub(super) dev_id: &'static str, - pub(super) psu_slot: u8, + #[serde(serialize_with = "serialize_psu_slot")] + pub(super) psu_slot: Slot, pub(super) fruid: PsuFruid, #[serde(skip_serializing_if = "Option::is_none")] pub(super) pmbus_status: Option, @@ -1146,4 +1197,14 @@ mod ereport { .and_then(|s| str::from_utf8(&s[..]).ok()) .serialize(serializer) } + + fn serialize_psu_slot( + slot: &Slot, + serializer: S, + ) -> Result + where + S: serde::Serializer, + { + (*slot as u8).serialize(serializer) + } } From f1ff5868341f77b36b973dec36223f0a0a56c34a Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Fri, 5 Sep 2025 14:57:51 -0700 Subject: [PATCH 7/7] hahahahahaha this is all profoundly bad --- Cargo.lock | 37 ++-- Cargo.toml | 2 +- build/i2c/Cargo.toml | 2 +- build/i2c/src/lib.rs | 230 ++++++++++++++-------- task/control-plane-agent/src/inventory.rs | 39 ++-- task/control-plane-agent/src/mgs_psc.rs | 2 +- task/validate-api/build.rs | 24 +++ task/validate-api/src/lib.rs | 6 + 8 files changed, 224 insertions(+), 118 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 69029d1d6e..30d2778add 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6,7 +6,7 @@ version = 4 name = "abi" version = "0.1.0" dependencies = [ - "bitflags 2.9.1", + "bitflags 2.9.4", "byteorder", "phash", "serde", @@ -271,9 +271,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.9.1" +version = "2.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" +checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" [[package]] name = "bitvec" @@ -337,7 +337,7 @@ dependencies = [ name = "build-kconfig" version = "0.1.0" dependencies = [ - "bitflags 2.9.1", + "bitflags 2.9.4", "serde", ] @@ -1624,7 +1624,7 @@ version = "0.1.0" dependencies = [ "anyhow", "attest-api", - "bitflags 2.9.1", + "bitflags 2.9.4", "build-lpc55pins", "build-util", "call_rustfmt", @@ -1724,7 +1724,7 @@ dependencies = [ name = "drv-mb85rsxx-fram" version = "0.1.0" dependencies = [ - "bitflags 2.9.1", + "bitflags 2.9.4", "counters", "drv-spi-api", ] @@ -2597,7 +2597,7 @@ dependencies = [ name = "drv-stm32xx-sys" version = "0.1.0" dependencies = [ - "bitflags 2.9.1", + "bitflags 2.9.4", "build-stm32xx-sys", "build-util", "cfg-if", @@ -3012,7 +3012,7 @@ checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" [[package]] name = "gateway-ereport-messages" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/management-gateway-service#57ffd1c24f3ad1919fb4a605d5a501f2c5deb54c" +source = "git+https://github.com/oxidecomputer/management-gateway-service#831d4be4d3a851853ee2c62451f63f2fcfb4a30b" dependencies = [ "zerocopy 0.8.26", ] @@ -3020,9 +3020,9 @@ dependencies = [ [[package]] name = "gateway-messages" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/management-gateway-service#83fe1885c9916cebbee261c47a09d832d75b7df0" +source = "git+https://github.com/oxidecomputer/management-gateway-service?branch=eliza%2Fvpd#dc297c9a94674d13db3f2faf7536af70a8671e8c" dependencies = [ - "bitflags 2.9.1", + "bitflags 2.9.4", "hubpack", "serde", "serde-big-array 0.5.1", @@ -3254,7 +3254,7 @@ dependencies = [ name = "host-sp-messages" version = "0.1.0" dependencies = [ - "bitflags 2.9.1", + "bitflags 2.9.4", "counters", "drv-i2c-types", "fletcher", @@ -3500,7 +3500,7 @@ dependencies = [ "abi", "anyhow", "armv8-m-mpu", - "bitflags 2.9.1", + "bitflags 2.9.4", "build-kconfig", "build-util", "byteorder", @@ -5188,20 +5188,19 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] name = "strum" -version = "0.27.1" +version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f64def088c51c9510a8579e3c5d67c65349dcf755e5479ad3d010aa6454e2c32" +checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" [[package]] name = "strum_macros" -version = "0.27.1" +version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c77a8c5abcaf0f9ce05d62342b7d298c346515365c36b673df4ebe3ced01fde8" +checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7" dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "rustversion", "syn 2.0.98", ] @@ -5980,7 +5979,7 @@ name = "task-thermal" version = "0.1.0" dependencies = [ "anyhow", - "bitflags 2.9.1", + "bitflags 2.9.4", "build-i2c", "build-util", "cortex-m", @@ -6503,7 +6502,7 @@ name = "transceiver-messages" version = "0.1.1" source = "git+https://github.com/oxidecomputer/transceiver-control/#c5564eb96ddc7887a02596cc039662c87e906d0c" dependencies = [ - "bitflags 2.9.1", + "bitflags 2.9.4", "hubpack", "serde", ] diff --git a/Cargo.toml b/Cargo.toml index bdf1031473..084c24d282 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -145,7 +145,7 @@ zip = { version = "0.6", default-features = false, features = ["bzip2", "deflate # Oxide forks and repos attest-data = { git = "https://github.com/oxidecomputer/dice-util", default-features = false, version = "0.4.0" } dice-mfg-msgs = { git = "https://github.com/oxidecomputer/dice-util", default-features = false, version = "0.2.1" } -gateway-messages = { git = "https://github.com/oxidecomputer/management-gateway-service", default-features = false, features = ["smoltcp"] } +gateway-messages = { git = "https://github.com/oxidecomputer/management-gateway-service", default-features = false, features = ["smoltcp"], branch = "eliza/vpd" } gateway-ereport-messages = { git = "https://github.com/oxidecomputer/management-gateway-service", default-features = false } gimlet-inspector-protocol = { git = "https://github.com/oxidecomputer/gimlet-inspector-protocol", version = "0.1.0" } hif = { git = "https://github.com/oxidecomputer/hif", default-features = false } diff --git a/build/i2c/Cargo.toml b/build/i2c/Cargo.toml index caa24d449c..3b0e2101d4 100644 --- a/build/i2c/Cargo.toml +++ b/build/i2c/Cargo.toml @@ -11,7 +11,7 @@ cfg-if = { workspace = true } convert_case = { workspace = true } indexmap = { workspace = true } multimap = { workspace = true } -serde = { workspace = true } +serde = { workspace = true, features = ["rc"] } rangemap.workspace = true [features] diff --git a/build/i2c/src/lib.rs b/build/i2c/src/lib.rs index d2618ce240..836943f199 100644 --- a/build/i2c/src/lib.rs +++ b/build/i2c/src/lib.rs @@ -11,6 +11,7 @@ use serde::Deserialize; use std::collections::{BTreeMap, HashMap, HashSet}; use std::fmt::Write; use std::fs::File; +use std::rc::Rc; // // Our definition of the `Config` type. We share this type with all other @@ -26,7 +27,7 @@ struct Config { #[serde(rename_all = "kebab-case", deny_unknown_fields)] struct I2cConfig { controllers: Vec, - devices: Option>, + devices: Option>>, } // @@ -67,19 +68,19 @@ struct I2cController { #[allow(dead_code)] struct I2cDevice { /// device part name - device: String, + device: Rc, /// device name - name: Option, + name: Option>, /// I2C controller, if bus not named controller: Option, /// I2C bus name, if controller not specified - bus: Option, + bus: Option>, /// I2C port, if required - port: Option, + port: Option>, /// Disambiguation between sensor configurations flavor: Option, @@ -97,7 +98,7 @@ struct I2cDevice { description: String, /// reference designator, if any - refdes: Option, + refdes: Option>, /// power information, if any power: Option, @@ -105,11 +106,19 @@ struct I2cDevice { /// sensor information, if any sensors: Option, + // /// generated VPD implementation, if any + // vpd: Option, /// device is removable #[serde(default)] removable: bool, } +#[derive(Copy, Clone, Debug, Deserialize, PartialOrd, Ord, Eq, PartialEq)] +pub enum VpdDescription { + At24csw080(usize), + Pmbus, +} + impl I2cDevice { /// Checks whether the given sensor kind is associated with an `I2cPower` /// struct stored in this device, returning it if that's the case. @@ -171,7 +180,7 @@ struct I2cMux { #[allow(dead_code)] #[serde(rename_all = "kebab-case", deny_unknown_fields)] struct I2cPower { - rails: Option>, + rails: Option>>, /// Optional phases, which must be the same length as `rails` if present phases: Option>>, @@ -219,7 +228,7 @@ struct I2cSensors { #[serde(default)] speed: usize, - names: Option>, + names: Option>>, } #[derive(Clone, Debug, Deserialize, Hash, PartialOrd, PartialEq, Eq, Ord)] @@ -259,27 +268,27 @@ impl I2cSensors { #[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd)] struct DeviceKey { - device: String, + device: Rc, kind: Sensor, } #[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd)] struct DeviceNameKey { - device: String, - name: String, + device: Rc, + name: Rc, kind: Sensor, } #[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd)] struct DeviceRefdesKey { - device: String, - refdes: Refdes, + device: Rc, + refdes: Rc, kind: Sensor, } #[derive(Debug, Clone, PartialEq, Eq)] pub struct DeviceSensor { - pub name: Option, + pub name: Option>, pub kind: Sensor, pub id: usize, } @@ -304,7 +313,7 @@ struct I2cSensorsDescription { } impl I2cSensorsDescription { - fn new(devices: &[I2cDevice]) -> Self { + fn new(devices: &[Rc]) -> Self { let mut desc = Self { by_device: MultiMap::with_capacity(devices.len()), by_name: MultiMap::new(), @@ -366,7 +375,7 @@ impl I2cSensorsDescription { let id = self.total_sensors; self.total_sensors += 1; - let name: Option = if let Some(power) = d.power_for_kind(kind) { + let name = if let Some(power) = d.power_for_kind(kind) { if let Some(rails) = &power.rails { if idx < rails.len() { Some(rails[idx].clone()) @@ -492,7 +501,7 @@ struct ConfigGenerator { controllers: Vec, /// all devices - devices: Vec, + devices: Vec>, /// hash bus name to controller/port index pair buses: HashMap, @@ -576,7 +585,7 @@ impl ConfigGenerator { d.device, d.address ); } - (_, Some(bus)) if !buses.contains_key(bus) => { + (_, Some(bus)) if !buses.contains_key(bus.as_ref()) => { panic!( "device {} at address {:#x} specifies \ unknown bus \"{bus}\"", @@ -871,7 +880,7 @@ impl ConfigGenerator { fn lookup_controller_port(&self, d: &I2cDevice) -> (u8, usize) { let controller = match &d.bus { - Some(bus) => self.buses.get(bus).unwrap().0, + Some(bus) => self.buses.get(bus.as_ref()).unwrap().0, None => d.controller.unwrap(), }; @@ -880,7 +889,7 @@ impl ConfigGenerator { panic!("device {} has both port and bus", d.device); } - (Some(bus), None) => match self.buses.get(bus) { + (Some(bus), None) => match self.buses.get(bus.as_ref()) { Some((_, port)) => port, None => { panic!("device {} has invalid bus", d.device); @@ -983,50 +992,7 @@ impl ConfigGenerator { } pub fn generate_devices(&mut self) -> Result<()> { - // - // Throw all devices into a MultiMap based on device. - // - let mut by_device = MultiMap::new(); - let mut by_name = HashMap::new(); - let mut by_refdes = HashMap::new(); - let mut by_bus = MultiMap::new(); - - let mut by_port = MultiMap::new(); - let mut by_controller = MultiMap::new(); - - for (index, d) in self.devices.iter().enumerate() { - by_device.insert(&d.device, d); - - let (controller, port) = self.lookup_controller_port(d); - - by_port.insert(port, index); - by_controller.insert(controller, index); - - if let Some(bus) = &d.bus { - by_bus.insert((&d.device, bus), d); - } - - if let Some(name) = &d.name { - if by_name.insert((&d.device, name), d).is_some() { - panic!("duplicate name {} for device {}", name, d.device) - } - } - if let Some(refdes) = &d.refdes { - if by_refdes.insert((&d.device, refdes), d).is_some() { - panic!( - "duplicate refdes {refdes:?} for device {}", - d.device - ) - } else if by_name - .contains_key(&(&d.device, &refdes.to_upper_ident())) - { - panic!( - "refdes {refdes:?} for device {} is also a device name", - d.device - ) - } - } - } + let devices = self.device_table(); write!( &mut self.output, @@ -1048,7 +1014,7 @@ impl ConfigGenerator { match index {{"## )?; - let mut all: Vec<_> = by_controller.iter_all().collect(); + let mut all: Vec<_> = devices.by_controller.iter_all().collect(); all.sort(); match_arms(&mut self.output, all, |c| { @@ -1073,7 +1039,7 @@ impl ConfigGenerator { match index {{"## )?; - let mut all: Vec<_> = by_port.iter_all().collect(); + let mut all: Vec<_> = devices.by_port.iter_all().collect(); all.sort(); match_arms(&mut self.output, all, |p| format!("Some(PortIndex({p}))"))?; @@ -1087,7 +1053,7 @@ impl ConfigGenerator { "## )?; - let mut all: Vec<_> = by_device.iter_all().collect(); + let mut all: Vec<_> = devices.by_device.iter_all().collect(); all.sort(); for (device, devices) in all { @@ -1114,7 +1080,7 @@ impl ConfigGenerator { )?; } - let mut all: Vec<_> = by_bus.iter_all().collect(); + let mut all: Vec<_> = devices.by_bus.iter_all().collect(); all.sort(); for ((device, bus), devices) in all { @@ -1141,7 +1107,7 @@ impl ConfigGenerator { )?; } - let mut all: Vec<_> = by_name.iter().collect(); + let mut all: Vec<_> = devices.by_name.iter().collect(); all.sort(); for ((device, name), d) in &all { write!( @@ -1163,7 +1129,7 @@ impl ConfigGenerator { )?; } - let mut all: Vec<_> = by_refdes.iter().collect(); + let mut all: Vec<_> = devices.by_refdes.iter().collect(); all.sort(); let mut max_component_id_len = 0; @@ -1275,7 +1241,7 @@ impl ConfigGenerator { // returned by `device_descriptions()` below: if we change the ordering // here, it must be updated there as well. for (index, device) in self.devices.iter().enumerate() { - if drivers.contains(&device.device) { + if drivers.contains(device.device.as_ref()) { let driver = device.device.to_case(Case::UpperCamel); let out = self.generate_device(device, 24); @@ -1590,14 +1556,14 @@ impl ConfigGenerator { s.total_sensors )?; - let mut emitted_structs: HashMap> = + let mut emitted_structs: HashMap, Option> = HashMap::new(); for (i, d) in self.devices.clone().iter().enumerate() { let mut struct_name = d.device.clone(); if let Some(suffix) = &d.flavor { - struct_name = format!("{struct_name}_{suffix}"); + struct_name = Rc::new(format!("{struct_name}_{suffix}")); } - if let Some(prev) = emitted_structs.get(&struct_name) { + if let Some(prev) = emitted_structs.get(struct_name.as_ref()) { match (prev, &d.sensors) { (Some(a), Some(b)) => { if !a.is_compatible_with(b) { @@ -1692,6 +1658,69 @@ impl ConfigGenerator { writeln!(&mut self.output, " }}")?; Ok(()) } + + fn device_table(&self) -> DeviceTable { + // + // Throw all devices into a MultiMap based on device. + // + let mut by_device = MultiMap::new(); + let mut by_name = HashMap::new(); + let mut by_refdes = HashMap::new(); + let mut by_bus = MultiMap::new(); + + let mut by_port = MultiMap::new(); + let mut by_controller = MultiMap::new(); + + for (index, d) in self.devices.iter().enumerate() { + by_device.insert(d.device.clone(), d.clone()); + + let (controller, port) = self.lookup_controller_port(d); + + by_port.insert(port, index); + by_controller.insert(controller, index); + + if let Some(ref bus) = d.bus { + by_bus.insert((d.device.clone(), bus.clone()), d.clone()); + } + + if let Some(ref name) = d.name { + if by_name + .insert((d.device.clone(), name.clone()), d.clone()) + .is_some() + { + panic!("duplicate name {} for device {}", name, d.device) + } + } + if let Some(ref refdes) = d.refdes { + if by_refdes + .insert((d.device.clone(), refdes.clone()), d.clone()) + .is_some() + { + panic!( + "duplicate refdes {refdes:?} for device {}", + d.device + ) + } else if by_name.contains_key(&( + d.device.clone(), + Rc::new(refdes.to_upper_ident()), + )) { + panic!( + "refdes {refdes:?} for device {} is also a device name", + d.device + ) + } + } + } + + DeviceTable { + by_name, + by_refdes, + by_device, + by_port, + by_controller, + by_bus, + } + } } #[derive(Copy, Clone)] @@ -1771,12 +1800,22 @@ pub fn codegen(settings: impl Into) -> Result<()> { Ok(()) } +struct DeviceTable { + by_device: MultiMap, Rc>, + by_name: HashMap<(Rc, Rc), Rc>, + by_refdes: HashMap<(Rc, Rc), Rc>, + by_bus: MultiMap<(Rc, Rc), Rc>, + by_port: MultiMap, + by_controller: MultiMap, +} + pub struct I2cDeviceDescription { - pub device: String, + pub device: Rc, pub description: String, pub sensors: Vec, pub device_id: Option, - pub name: Option, + pub name: Option>, + pub vpd: Option, } /// @@ -1787,6 +1826,20 @@ pub struct I2cDeviceDescription { /// pub fn device_descriptions() -> impl Iterator { let g = ConfigGenerator::new(Disposition::Validation.into()); + let devicetable = g.device_table(); + let at24csw080_indices = if let Some(eeproms) = + devicetable.by_device.get_vec(&("at24csw080".to_string())) + { + eeproms + .iter() + .enumerate() + .filter_map(|(index, device)| { + Some((device.refdes.as_ref()?.clone(), index)) + }) + .collect::>() + } else { + HashMap::new() + }; let sensors = g.sensors_description(); assert_eq!(sensors.device_sensors.len(), g.devices.len()); @@ -1794,14 +1847,29 @@ pub fn device_descriptions() -> impl Iterator { // Matches the ordering of the `match` produced by `generate_validation()` // above; if we change the order here, it must change there as well. g.devices.into_iter().zip(sensors.device_sensors).map( - |(device, sensors)| { - let device_id = device.refdes.as_ref().map(Refdes::to_component_id); + move |(device, sensors)| { + let vpd = if (&*device.device) == "at24csw080" { + let Some(refdes) = device.refdes.as_deref() else { + panic!("device {device:?} has AT24CSW080 VPD kind, but no refdes") + }; + let Some(vpd_idx) = at24csw080_indices.get(refdes) else { + panic!("index for {refdes:?} not found in AT24CSW080 indices") + }; + Some(VpdDescription::At24csw080(*vpd_idx)) + } else if device.power.as_ref().map(|power| power.pmbus).unwrap_or(false) { + Some(VpdDescription::Pmbus) + } else { + None + }; + let device_id = + device.refdes.as_deref().map(Refdes::to_component_id); I2cDeviceDescription { - device: device.device, - description: device.description, + device: device.device.clone(), + description: device.description.clone(), sensors, device_id, - name: device.name, + name: device.name.clone(), + vpd, } }, ) diff --git a/task/control-plane-agent/src/inventory.rs b/task/control-plane-agent/src/inventory.rs index 0832192500..80e2cca93e 100644 --- a/task/control-plane-agent/src/inventory.rs +++ b/task/control-plane-agent/src/inventory.rs @@ -44,7 +44,9 @@ impl Inventory { match Index::try_from(component)? { Index::OurDevice(_) => Ok(0), Index::ValidateDevice(i) => { - Ok(VALIDATE_DEVICES[i].sensors.len() as u32) + let device = VALIDATE_DEVICES[i]; + + Ok(device.sensors.len() as u32 + device.vpd.is_some() as u32) } } } @@ -53,7 +55,7 @@ impl Inventory { &self, component: &SpComponent, component_index: BoundsChecked, - ) -> ComponentDetails { + ) -> ComponentDetails<&str> { // `component_index` is guaranteed to be in the range // `0..num_component_details(component)`, and we only return a value // greater than 0 from that method for indices in the VALIDATE_DEVICES @@ -64,20 +66,27 @@ impl Inventory { Ok(Index::ValidateDevice(i)) => i, Ok(Index::OurDevice(_)) | Err(_) => panic!(), }; + let device = &VALIDATE_DEVICES[val_device_index]; + if let Some(sensor_description) = + device.sensors.get(component_index.0 as usize) + { + let value = self + .sensor_task + .get(sensor_description.id) + .map_err(|err| SensorErrorConvert(err).into()); + + return ComponentDetails::Measurement(Measurement { + name: sensor_description.name.unwrap_or(""), + kind: MeasurementKindConvert(sensor_description.kind).into(), + value, + }); + } - let sensor_description = &VALIDATE_DEVICES[val_device_index].sensors - [component_index.0 as usize]; - - let value = self - .sensor_task - .get(sensor_description.id) - .map_err(|err| SensorErrorConvert(err).into()); - - ComponentDetails::Measurement(Measurement { - name: sensor_description.name.unwrap_or(""), - kind: MeasurementKindConvert(sensor_description.kind).into(), - value, - }) + match device.vpd { + Some(task_validate_api::VpdDescription::VpdTask(idx)) => todo!(), + Some(task_validate_api::VpdDescription::Pmbus) => todo!(), + None => panic!(), // details index out of range?? + } } pub(crate) fn device_description( diff --git a/task/control-plane-agent/src/mgs_psc.rs b/task/control-plane-agent/src/mgs_psc.rs index fdf45de984..2fa0123e5d 100644 --- a/task/control-plane-agent/src/mgs_psc.rs +++ b/task/control-plane-agent/src/mgs_psc.rs @@ -451,7 +451,7 @@ impl SpHandler for MgsHandler { &mut self, component: SpComponent, index: BoundsChecked, - ) -> ComponentDetails { + ) -> ComponentDetails<&'_ str> { self.common.inventory().component_details(&component, index) } diff --git a/task/validate-api/build.rs b/task/validate-api/build.rs index 869ef1c4dc..39c7244deb 100644 --- a/task/validate-api/build.rs +++ b/task/validate-api/build.rs @@ -15,6 +15,7 @@ fn main() -> Result<(), Box> { } fn write_pub_device_descriptions() -> anyhow::Result<()> { + use build_i2c::VpdDescription; use gateway_messages::SpComponent; let devices = build_i2c::device_descriptions().collect::>(); @@ -92,6 +93,29 @@ fn write_pub_device_descriptions() -> anyhow::Result<()> { writeln!(file, " }},")?; } writeln!(file, " ],")?; + match dev.vpd { + Some(VpdDescription::At24csw080(idx)) => { + if idx > u8::MAX as usize { + println!( + "cargo::error=device {:?} ({:?}) VPD task index > {},", + dev.device, + dev.description, + u8::MAX + ); + } + writeln!( + file, + " vpd: Some(VpdDescription::VpdTask({idx}))," + )?; + } + Some(VpdDescription::Pmbus) => { + println!("cargo::warning=eliza has to figure out how to make PMBus FRUID lookups work"); + writeln!(file, " vpd: None,")?; + } + None => { + writeln!(file, " vpd: None,")?; + } + }; writeln!(file, " }},")?; } diff --git a/task/validate-api/src/lib.rs b/task/validate-api/src/lib.rs index 3884406f07..deaf51314d 100644 --- a/task/validate-api/src/lib.rs +++ b/task/validate-api/src/lib.rs @@ -75,6 +75,12 @@ pub struct DeviceDescription { pub description: &'static str, pub sensors: &'static [SensorDescription], pub id: [u8; MAX_ID_LENGTH], + pub vpd: Option, +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum VpdDescription { + VpdTask(u8), } include!(concat!(env!("OUT_DIR"), "/device_descriptions.rs"));