Skip to content

Commit

Permalink
Stop CNPG reconciliation for hibernated instances (#1038)
Browse files Browse the repository at this point in the history
  • Loading branch information
vrmiguel authored Nov 11, 2024
1 parent 42204b3 commit 9997dd9
Show file tree
Hide file tree
Showing 2 changed files with 138 additions and 3 deletions.
2 changes: 2 additions & 0 deletions conductor/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

139 changes: 136 additions & 3 deletions tembo-operator/src/cloudnativepg/hibernate.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use crate::apis::coredb_types::CoreDB;
use crate::cloudnativepg::clusters::{ClusterStatusConditions, ClusterStatusConditionsStatus};
use crate::cloudnativepg::cnpg::{get_cluster, get_pooler, get_scheduled_backups};
use crate::cloudnativepg::poolers::Pooler;
use crate::cloudnativepg::scheduledbackups::ScheduledBackup;
Expand All @@ -21,6 +22,8 @@ use std::sync::Arc;
use std::time::Duration;
use tracing::{debug, error, info, warn};

use super::clusters::Cluster;

/// Resolves hibernation in the Cluster and related services of the CoreDB
///
/// If the cluster is in spec.stop state, this will activate the CNPG hibernation
Expand Down Expand Up @@ -142,18 +145,31 @@ pub async fn reconcile_cluster_hibernation(cdb: &CoreDB, ctx: &Arc<Context>) ->
}
}

// Build the hibernation patch we want to apply to disable the CNPG cluster.
// Stop CNPG reconciliation for hibernated instances.
// We should not stop CNPG reconciliation until hibernation is fully completed,
// as the instance may not finish hibernating otherwise.
//
// Disabling reconciliation for stopped instances is important because, as the number
// of stopped instances grows, reconciliation performance is significantly impacted
let stop_cnpg_reconciliation = cdb.spec.stop && is_cluster_hibernated(&cluster);
let stop_cnpg_reconciliation_value = if stop_cnpg_reconciliation {
"disabled"
} else {
"enabled"
};

let cluster_annotations = cluster.metadata.annotations.unwrap_or_default();
let hibernation_value = if cdb.spec.stop { "on" } else { "off" };

// Build the hibernation patch we want to apply to disable the CNPG cluster.
let patch_hibernation_annotation = json!({
"metadata": {
"annotations": {
"cnpg.io/hibernation": hibernation_value
"cnpg.io/hibernation": hibernation_value,
"cnpg.io/reconciliationLoop": stop_cnpg_reconciliation_value,
}
}
});

// Update ScheduledBackup if it exists
if let Err(action) = update_scheduled_backups(&scheduled_backups, cdb, ctx).await {
warn!(
Expand Down Expand Up @@ -333,3 +349,120 @@ async fn update_scheduled_backups(

Ok(())
}

fn is_cluster_hibernated(cluster: &Cluster) -> bool {
fn get_hibernation_condition(cluster: &Cluster) -> Option<&ClusterStatusConditions> {
cluster
.status
.as_ref()?
.conditions
.as_ref()?
.iter()
.find(|condition| condition.r#type == "cnpg.io/hibernation")
}

get_hibernation_condition(cluster)
.map(|condition| condition.status == ClusterStatusConditionsStatus::True)
.unwrap_or(
// If we did not find a cnpg.io/hibernation annotation, likely the cluster has never been hibernated
false,
)
}

#[cfg(test)]
mod tests {
use kube::api::ObjectMeta;

use crate::cloudnativepg::{
clusters::{
Cluster, ClusterSpec, ClusterStatus, ClusterStatusConditions,
ClusterStatusConditionsStatus,
},
hibernate::is_cluster_hibernated,
};

#[test]
fn test_is_cluster_hibernated() {
// Not hibernated yet: still in progress
assert_eq!(is_cluster_hibernated(&hibernation_in_progress()), false);
// Not hibernated: unrelated condition
assert_eq!(is_cluster_hibernated(&backed_up_cluster()), false);
// Hibernated: "type" is "cnpg.io/hibernation" and "status" is "True"
assert!(is_cluster_hibernated(&hibernation_completed()));
}

fn hibernation_in_progress() -> Cluster {
Cluster {
metadata: ObjectMeta {
name: Some("test-cluster".to_string()),
namespace: Some("test".to_string()),
..ObjectMeta::default()
},
spec: ClusterSpec {
..Default::default()
},
status: Some(ClusterStatus {
instances: Some(1),
conditions: Some(vec![ClusterStatusConditions {
last_transition_time: "2024-11-11T19:33:58Z".into(),
message: "Hibernation is in progress".into(),
observed_generation: None,
reason: "DeletingPods".into(),
status: ClusterStatusConditionsStatus::False,
r#type: "cnpg.io/hibernation".into(),
}]),
..ClusterStatus::default()
}),
}
}

fn hibernation_completed() -> Cluster {
Cluster {
metadata: ObjectMeta {
name: Some("test-cluster".to_string()),
namespace: Some("test".to_string()),
..ObjectMeta::default()
},
spec: ClusterSpec {
..Default::default()
},
status: Some(ClusterStatus {
instances: Some(1),
conditions: Some(vec![ClusterStatusConditions {
last_transition_time: "2024-11-11T19:33:58Z".into(),
message: "Cluster has been hibernated".into(),
observed_generation: None,
reason: "Hibernated".into(),
status: ClusterStatusConditionsStatus::True,
r#type: "cnpg.io/hibernation".into(),
}]),
..ClusterStatus::default()
}),
}
}

fn backed_up_cluster() -> Cluster {
Cluster {
metadata: ObjectMeta {
name: Some("test-cluster".to_string()),
namespace: Some("test".to_string()),
..ObjectMeta::default()
},
spec: ClusterSpec {
..Default::default()
},
status: Some(ClusterStatus {
instances: Some(1),
conditions: Some(vec![ClusterStatusConditions {
last_transition_time: "2024-11-11T19:33:58Z".into(),
message: "Backup was successful".into(),
observed_generation: None,
reason: "LastBackupSucceeded".into(),
status: ClusterStatusConditionsStatus::True,
r#type: "LastBackupSucceeded".into(),
}]),
..ClusterStatus::default()
}),
}
}
}

0 comments on commit 9997dd9

Please sign in to comment.