@@ -20,9 +20,11 @@ use futures::stream::FuturesUnordered;
2020use gateway_client:: Client as MgsClient ;
2121use gateway_client:: types:: SpIdentifier ;
2222use gateway_client:: types:: SpIgnition ;
23+ use gateway_types:: component:: SpType ;
2324use internal_dns_resolver:: Resolver ;
2425use internal_dns_types:: names:: ServiceName ;
2526use nexus_db_model:: Ereport ;
27+ use nexus_db_model:: Sled ;
2628use nexus_db_model:: SupportBundle ;
2729use nexus_db_model:: SupportBundleState ;
2830use nexus_db_queries:: authz;
@@ -47,9 +49,11 @@ use omicron_uuid_kinds::SledUuid;
4749use omicron_uuid_kinds:: SupportBundleUuid ;
4850use omicron_uuid_kinds:: ZpoolUuid ;
4951use parallel_task_set:: ParallelTaskSet ;
52+ use serde:: Serialize ;
5053use serde_json:: json;
5154use sha2:: { Digest , Sha256 } ;
5255use slog_error_chain:: InlineErrorChain ;
56+ use std:: collections:: BTreeMap ;
5357use std:: future:: Future ;
5458use std:: io:: Write ;
5559use std:: num:: NonZeroU64 ;
@@ -61,6 +65,7 @@ use tokio::io::AsyncWriteExt;
6165use tokio:: io:: SeekFrom ;
6266use tokio_util:: task:: AbortOnDropHandle ;
6367use tufaceous_artifact:: ArtifactHash ;
68+ use uuid:: Uuid ;
6469use zip:: ZipArchive ;
6570use zip:: ZipWriter ;
6671use zip:: write:: FullFileOptions ;
@@ -707,23 +712,44 @@ impl BundleCollection {
707712 None
708713 } ;
709714
710- let sp_dumps_dir = dir. path ( ) . join ( "sp_task_dumps" ) ;
711- tokio:: fs:: create_dir_all ( & sp_dumps_dir) . await . with_context ( || {
712- format ! ( "failed to create SP task dump directory {sp_dumps_dir}" )
713- } ) ?;
714- if let Err ( e) =
715- save_all_sp_dumps ( log, & self . resolver , & sp_dumps_dir) . await
716- {
717- error ! ( log, "failed to capture SP task dumps" ; "error" => InlineErrorChain :: new( e. as_ref( ) ) ) ;
718- } else {
719- report. listed_sps = true ;
720- } ;
721-
722- if let Ok ( all_sleds) = self
715+ let all_sleds = self
723716 . datastore
724717 . sled_list_all_batched ( & self . opctx , SledFilter :: InService )
718+ . await ;
719+
720+ if let Ok ( mgs_client) = self . create_mgs_client ( ) . await {
721+ if let Err ( e) = write_sled_info (
722+ & self . log ,
723+ & mgs_client,
724+ all_sleds. as_deref ( ) . ok ( ) ,
725+ dir. path ( ) ,
726+ )
725727 . await
726- {
728+ {
729+ error ! ( log, "Failed to write sled_info.json" ; "error" => InlineErrorChain :: new( e. as_ref( ) ) ) ;
730+ }
731+
732+ let sp_dumps_dir = dir. path ( ) . join ( "sp_task_dumps" ) ;
733+ tokio:: fs:: create_dir_all ( & sp_dumps_dir) . await . with_context (
734+ || {
735+ format ! (
736+ "Failed to create SP task dump directory {sp_dumps_dir}"
737+ )
738+ } ,
739+ ) ?;
740+
741+ if let Err ( e) =
742+ save_all_sp_dumps ( log, & mgs_client, & sp_dumps_dir) . await
743+ {
744+ error ! ( log, "Failed to capture SP task dumps" ; "error" => InlineErrorChain :: new( e. as_ref( ) ) ) ;
745+ } else {
746+ report. listed_sps = true ;
747+ } ;
748+ } else {
749+ warn ! ( log, "No MGS client, skipping SP task dump collection" ) ;
750+ }
751+
752+ if let Ok ( all_sleds) = all_sleds {
727753 report. listed_in_service_sleds = true ;
728754
729755 const MAX_CONCURRENT_SLED_REQUESTS : usize = 16 ;
@@ -1031,6 +1057,20 @@ impl BundleCollection {
10311057 ) ;
10321058 Ok ( ( ) )
10331059 }
1060+
1061+ async fn create_mgs_client ( & self ) -> anyhow:: Result < MgsClient > {
1062+ self
1063+ . resolver
1064+ . lookup_socket_v6 ( ServiceName :: ManagementGatewayService )
1065+ . await
1066+ . map ( |sockaddr| {
1067+ let url = format ! ( "http://{}" , sockaddr) ;
1068+ gateway_client:: Client :: new ( & url, self . log . clone ( ) )
1069+ } ) . map_err ( |e| {
1070+ error ! ( self . log, "failed to resolve MGS address" ; "error" => InlineErrorChain :: new( & e) ) ;
1071+ e. into ( )
1072+ } )
1073+ }
10341074}
10351075
10361076impl BackgroundTask for SupportBundleCollector {
@@ -1316,18 +1356,9 @@ where
13161356/// Collect task dumps from all SPs via MGS and save them to a directory.
13171357async fn save_all_sp_dumps (
13181358 log : & slog:: Logger ,
1319- resolver : & Resolver ,
1359+ mgs_client : & MgsClient ,
13201360 sp_dumps_dir : & Utf8Path ,
13211361) -> anyhow:: Result < ( ) > {
1322- let mgs_client = resolver
1323- . lookup_socket_v6 ( ServiceName :: ManagementGatewayService )
1324- . await
1325- . map ( |sockaddr| {
1326- let url = format ! ( "http://{}" , sockaddr) ;
1327- gateway_client:: Client :: new ( & url, log. clone ( ) )
1328- } )
1329- . context ( "failed to resolve address of MGS" ) ?;
1330-
13311362 let available_sps = get_available_sps ( & mgs_client) . await ?;
13321363
13331364 let mut tasks = ParallelTaskSet :: new ( ) ;
@@ -1412,6 +1443,82 @@ async fn save_sp_dumps(
14121443 Ok ( ( ) )
14131444}
14141445
1446+ /// Write a file with a JSON mapping of sled serial numbers to cubby and UUIDs for easier
1447+ /// identification of sleds present in a bundle.
1448+ async fn write_sled_info (
1449+ log : & slog:: Logger ,
1450+ mgs_client : & MgsClient ,
1451+ nexus_sleds : Option < & [ Sled ] > ,
1452+ dir : & Utf8Path ,
1453+ ) -> anyhow:: Result < ( ) > {
1454+ #[ derive( Serialize ) ]
1455+ struct SledInfo {
1456+ cubby : Option < u16 > ,
1457+ uuid : Option < Uuid > ,
1458+ }
1459+
1460+ let available_sps = get_available_sps ( & mgs_client)
1461+ . await
1462+ . context ( "failed to get available SPs" ) ?;
1463+
1464+ // We can still get a useful mapping of cubby to serial using just the data from MGS.
1465+ let mut nexus_map: BTreeMap < _ , _ > = nexus_sleds
1466+ . unwrap_or_default ( )
1467+ . into_iter ( )
1468+ . map ( |sled| ( sled. serial_number ( ) , sled) )
1469+ . collect ( ) ;
1470+
1471+ let mut sled_info = BTreeMap :: new ( ) ;
1472+ for sp in
1473+ available_sps. into_iter ( ) . filter ( |sp| matches ! ( sp. type_, SpType :: Sled ) )
1474+ {
1475+ let sp_state = match mgs_client. sp_get ( & sp. type_ , sp. slot ) . await {
1476+ Ok ( s) => s. into_inner ( ) ,
1477+ Err ( e) => {
1478+ error ! ( log,
1479+ "Failed to get SP state for sled_info.json" ;
1480+ "cubby" => sp. slot,
1481+ "component" => %sp. type_,
1482+ "error" => InlineErrorChain :: new( & e)
1483+ ) ;
1484+ continue ;
1485+ }
1486+ } ;
1487+
1488+ if let Some ( sled) = nexus_map. remove ( sp_state. serial_number . as_str ( ) ) {
1489+ sled_info. insert (
1490+ sp_state. serial_number . to_string ( ) ,
1491+ SledInfo {
1492+ cubby : Some ( sp. slot ) ,
1493+ uuid : Some ( * sled. identity . id . as_untyped_uuid ( ) ) ,
1494+ } ,
1495+ ) ;
1496+ } else {
1497+ sled_info. insert (
1498+ sp_state. serial_number . to_string ( ) ,
1499+ SledInfo { cubby : Some ( sp. slot ) , uuid : None } ,
1500+ ) ;
1501+ }
1502+ }
1503+
1504+ // Sleds not returned by MGS.
1505+ for ( serial, sled) in nexus_map {
1506+ sled_info. insert (
1507+ serial. to_string ( ) ,
1508+ SledInfo {
1509+ cubby : None ,
1510+ uuid : Some ( * sled. identity . id . as_untyped_uuid ( ) ) ,
1511+ } ,
1512+ ) ;
1513+ }
1514+
1515+ let json = serde_json:: to_string_pretty ( & sled_info)
1516+ . context ( "failed to serialize sled info to JSON" ) ?;
1517+ tokio:: fs:: write ( dir. join ( "sled_info.json" ) , json) . await ?;
1518+
1519+ Ok ( ( ) )
1520+ }
1521+
14151522fn is_fs_safe_single_path_component ( s : & str ) -> bool {
14161523 // Might be path traversal...
14171524 if s == "." || s == ".." {
0 commit comments