diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index ef098e7693fa..37ce026299da 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -2040,6 +2040,10 @@ async fn timeline_compact_handler( .as_ref() .map(|r| r.sub_compaction) .unwrap_or(false); + let sub_compaction_max_job_size_mb = compact_request + .as_ref() + .and_then(|r| r.sub_compaction_max_job_size_mb); + let options = CompactOptions { compact_key_range: compact_request .as_ref() @@ -2049,6 +2053,7 @@ async fn timeline_compact_handler( .and_then(|r| r.compact_lsn_range.clone()), flags, sub_compaction, + sub_compaction_max_job_size_mb, }; let scheduled = compact_request diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index a6c4aa45229d..47b7d936f559 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -3017,10 +3017,15 @@ impl Tenant { warn!("ignoring scheduled compaction task: scheduled task must be gc compaction: {:?}", next_scheduled_compaction_task.options); } else if next_scheduled_compaction_task.options.sub_compaction { info!("running scheduled enhanced gc bottom-most compaction with sub-compaction, splitting compaction jobs"); - let jobs = timeline - .gc_compaction_split_jobs(GcCompactJob::from_compact_options( - next_scheduled_compaction_task.options, - )) + let jobs: Vec = timeline + .gc_compaction_split_jobs( + GcCompactJob::from_compact_options( + next_scheduled_compaction_task.options.clone(), + ), + next_scheduled_compaction_task + .options + .sub_compaction_max_job_size_mb, + ) .await .map_err(CompactionError::Other)?; if jobs.is_empty() { @@ -3043,6 +3048,7 @@ impl Tenant { sub_compaction: false, compact_key_range: Some(job.compact_key_range.into()), compact_lsn_range: Some(job.compact_lsn_range.into()), + sub_compaction_max_job_size_mb: None, }; tline_pending_tasks.push_back(if idx == jobs_len - 1 { ScheduledCompactionTask { diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 294a4ad9e5f5..b5c707922668 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -788,6 +788,8 @@ pub(crate) struct CompactRequest { /// Whether the compaction job should be split across key ranges. #[serde(default)] pub sub_compaction: bool, + /// Max job size for each subcompaction job. + pub sub_compaction_max_job_size_mb: Option, } #[serde_with::serde_as] @@ -859,6 +861,9 @@ pub(crate) struct CompactOptions { /// Enable sub-compaction (split compaction job across key ranges). /// This option is only used by GC compaction. pub sub_compaction: bool, + /// Set job size for the GC compaction. + /// This option is only used by GC compaction. + pub sub_compaction_max_job_size_mb: Option, } impl std::fmt::Debug for Timeline { @@ -1683,6 +1688,7 @@ impl Timeline { compact_key_range: None, compact_lsn_range: None, sub_compaction: false, + sub_compaction_max_job_size_mb: None, }, ctx, ) diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index a2addb0b5996..a218623f9620 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -1805,12 +1805,19 @@ impl Timeline { pub(crate) async fn gc_compaction_split_jobs( self: &Arc, job: GcCompactJob, + sub_compaction_max_job_size_mb: Option, ) -> anyhow::Result> { let compact_below_lsn = if job.compact_lsn_range.end != Lsn::MAX { job.compact_lsn_range.end } else { *self.get_latest_gc_cutoff_lsn() // use the real gc cutoff }; + + // Split compaction job to about 4GB each + const GC_COMPACT_MAX_SIZE_MB: u64 = 4 * 1024; + let sub_compaction_max_job_size_mb = + sub_compaction_max_job_size_mb.unwrap_or(GC_COMPACT_MAX_SIZE_MB); + let mut compact_jobs = Vec::new(); // For now, we simply use the key partitioning information; we should do a more fine-grained partitioning // by estimating the amount of files read for a compaction job. We should also partition on LSN. @@ -1857,8 +1864,6 @@ impl Timeline { let guard = self.layers.read().await; let layer_map = guard.layer_map()?; let mut current_start = None; - // Split compaction job to about 2GB each - const GC_COMPACT_MAX_SIZE_MB: u64 = 4 * 1024; // 4GB, TODO: should be configuration in the future let ranges_num = split_key_ranges.len(); for (idx, (start, end)) in split_key_ranges.into_iter().enumerate() { if current_start.is_none() { @@ -1871,7 +1876,7 @@ impl Timeline { } let res = layer_map.range_search(start..end, compact_below_lsn); let total_size = res.found.keys().map(|x| x.layer.file_size()).sum::(); - if total_size > GC_COMPACT_MAX_SIZE_MB * 1024 * 1024 || ranges_num == idx + 1 { + if total_size > sub_compaction_max_job_size_mb * 1024 * 1024 || ranges_num == idx + 1 { // Try to extend the compaction range so that we include at least one full layer file. let extended_end = res .found @@ -1924,10 +1929,12 @@ impl Timeline { ctx: &RequestContext, ) -> anyhow::Result<()> { let sub_compaction = options.sub_compaction; - let job = GcCompactJob::from_compact_options(options); + let job = GcCompactJob::from_compact_options(options.clone()); if sub_compaction { info!("running enhanced gc bottom-most compaction with sub-compaction, splitting compaction jobs"); - let jobs = self.gc_compaction_split_jobs(job).await?; + let jobs = self + .gc_compaction_split_jobs(job, options.sub_compaction_max_job_size_mb) + .await?; let jobs_len = jobs.len(); for (idx, job) in jobs.into_iter().enumerate() { info!( diff --git a/test_runner/regress/test_compaction.py b/test_runner/regress/test_compaction.py index 1eceb6276bce..88873c63c24c 100644 --- a/test_runner/regress/test_compaction.py +++ b/test_runner/regress/test_compaction.py @@ -153,6 +153,7 @@ def test_pageserver_gc_compaction_smoke(neon_env_builder: NeonEnvBuilder): if i % 10 == 0: log.info(f"Running churn round {i}/{churn_rounds} ...") + if (i - 1) % 10 == 0: # Run gc-compaction every 10 rounds to ensure the test doesn't take too long time. ps_http.timeline_compact( tenant_id, @@ -165,6 +166,7 @@ def test_pageserver_gc_compaction_smoke(neon_env_builder: NeonEnvBuilder): "start": "000000000000000000000000000000000000", "end": "030000000000000000000000000000000000", }, + "sub_compaction_max_job_size_mb": 16, }, )