Skip to content

Commit

Permalink
Merge #1786
Browse files Browse the repository at this point in the history
1786: chore: increase ps_retries and silence log once logged r=Abhinandan-Purkait a=Abhinandan-Purkait

- Increase the ps_retries to 300.
- Silently retry.

Co-authored-by: Abhinandan Purkait <[email protected]>
  • Loading branch information
mayastor-bors and Abhinandan-Purkait committed Dec 18, 2024
2 parents d6fcd63 + b64d617 commit 88d1cc3
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 13 deletions.
21 changes: 14 additions & 7 deletions io-engine/src/bdev/nexus/nexus_persistence.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,12 @@ impl<'n> Nexus<'n> {
};
nexus_info.children.push(child_info);
});
// We started with this child because it was healthy in etcd, or isn't there at all.
// Being unhealthy here means it is undergoing a fault/retire before nexus is open.
if nexus_info.children.len() == 1 && !nexus_info.children[0].healthy {
// We started with this child because it was healthy in etcd, or
// isn't there at all. Being unhealthy here
// means it is undergoing a fault/retire before nexus is open.
if nexus_info.children.len() == 1
&& !nexus_info.children[0].healthy
{
warn!("{self:?} Not persisting: the only child went unhealthy during nexus creation");
return Err(Error::NexusCreate {
name: self.name.clone(),
Expand Down Expand Up @@ -224,6 +227,7 @@ impl<'n> Nexus<'n> {
};

let mut retry = PersistentStore::retries();
let mut logged = false;
loop {
let Err(err) = PersistentStore::put(&key, &info.inner).await else {
trace!(?key, "{self:?}: the state was saved successfully");
Expand All @@ -238,10 +242,13 @@ impl<'n> Nexus<'n> {
});
}

error!(
"{self:?}: failed to persist nexus information, \
will retry ({retry} left): {err}"
);
if !logged {
error!(
"{self:?}: failed to persist nexus information, \
will silently retry ({retry} left): {err}"
);
logged = true;
}

// Allow some time for the connection to the persistent
// store to be re-established before retrying the operation.
Expand Down
4 changes: 2 additions & 2 deletions io-engine/src/core/env.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ pub struct MayastorCliArgs {
pub ps_timeout: Duration,
#[clap(long = "ps-retries", default_value = "30")]
/// Persistent store operation retries.
pub ps_retries: u8,
pub ps_retries: u16,
#[clap(long = "bdev-pool-size", default_value = "65535")]
/// Number of entries in memory pool for bdev I/O contexts
pub bdev_io_ctx_pool_size: u64,
Expand Down Expand Up @@ -387,7 +387,7 @@ pub struct MayastorEnvironment {
pub registration_endpoint: Option<Uri>,
ps_endpoint: Option<String>,
ps_timeout: Duration,
ps_retries: u8,
ps_retries: u16,
mayastor_config: Option<String>,
ptpl_dir: Option<String>,
pool_config: Option<String>,
Expand Down
8 changes: 4 additions & 4 deletions io-engine/src/persistent_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ pub struct PersistentStoreBuilder {
/// Operation timeout.
timeout: Duration,
/// Number of operation retries.
retries: u8,
retries: u16,
}

impl Default for PersistentStoreBuilder {
Expand Down Expand Up @@ -79,7 +79,7 @@ impl PersistentStoreBuilder {
}

/// Sets number of operation retries.
pub fn with_retries(mut self, retries: u8) -> Self {
pub fn with_retries(mut self, retries: u16) -> Self {
self.retries = retries;
self
}
Expand All @@ -101,7 +101,7 @@ pub struct PersistentStore {
/// Operation timeout.
timeout: Duration,
/// Number of operation retries.
retries: u8,
retries: u16,
}

/// Persistent store global instance.
Expand Down Expand Up @@ -304,7 +304,7 @@ impl PersistentStore {
}

/// Gets the number of operation retries.
pub fn retries() -> u8 {
pub fn retries() -> u16 {
Self::instance().lock().retries
}

Expand Down

0 comments on commit 88d1cc3

Please sign in to comment.