-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(prover): Add support for scaling WGs and compressor (#3179)
## What ❔ Add support for scaling WGs and compressor. Cleanup unneeded pods in Agents. Include `in_progress` into the queue. <!-- What are the changes this PR brings about? --> <!-- Example: This PR adds a PR template to the repo. --> <!-- (For bigger PRs adding more context is appreciated) --> ## Why ❔ <!-- Why are these changes done? What goal do they contribute to? What are the principles behind them? --> <!-- Example: PR templates ensure PR reviewers, observers, and future iterators are in context about the evolution of repos. --> ## Checklist <!-- Check your PR fulfills the following items. --> <!-- For draft PRs check the boxes as you complete them. --> - [x] PR title corresponds to the body of PR (we generate changelog entries from PRs). - [x] Tests for the changes have been added / updated. - [ ] Documentation comments have been added / updated. - [x] Code has been formatted via `zkstack dev fmt` and `zkstack dev lint`. ref ZKD-1855
- Loading branch information
Showing
9 changed files
with
575 additions
and
56 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,49 +1,76 @@ | ||
use std::collections::HashMap; | ||
use std::{collections::HashMap, ops::Deref}; | ||
|
||
use anyhow::{Context, Ok}; | ||
use reqwest::Method; | ||
use zksync_prover_job_monitor::autoscaler_queue_reporter::VersionedQueueReport; | ||
use zksync_config::configs::prover_autoscaler::QueueReportFields; | ||
use zksync_prover_job_monitor::autoscaler_queue_reporter::{QueueReport, VersionedQueueReport}; | ||
use zksync_utils::http_with_retries::send_request_with_retries; | ||
|
||
use crate::metrics::{AUTOSCALER_METRICS, DEFAULT_ERROR_CODE}; | ||
|
||
const MAX_RETRIES: usize = 5; | ||
|
||
#[derive(Debug)] | ||
pub struct Queue { | ||
pub queue: HashMap<String, u64>, | ||
pub struct Queue(HashMap<(String, QueueReportFields), u64>); | ||
|
||
impl Deref for Queue { | ||
type Target = HashMap<(String, QueueReportFields), u64>; | ||
fn deref(&self) -> &Self::Target { | ||
&self.0 | ||
} | ||
} | ||
|
||
#[derive(Default)] | ||
pub struct Queuer { | ||
pub prover_job_monitor_url: String, | ||
} | ||
|
||
fn target_to_queue(target: &QueueReportFields, report: &QueueReport) -> u64 { | ||
let res = match target { | ||
QueueReportFields::basic_witness_jobs => report.basic_witness_jobs.all(), | ||
QueueReportFields::leaf_witness_jobs => report.leaf_witness_jobs.all(), | ||
QueueReportFields::node_witness_jobs => report.node_witness_jobs.all(), | ||
QueueReportFields::recursion_tip_witness_jobs => report.recursion_tip_witness_jobs.all(), | ||
QueueReportFields::scheduler_witness_jobs => report.scheduler_witness_jobs.all(), | ||
QueueReportFields::proof_compressor_jobs => report.proof_compressor_jobs.all(), | ||
QueueReportFields::prover_jobs => report.prover_jobs.all(), | ||
}; | ||
res as u64 | ||
} | ||
|
||
impl Queuer { | ||
pub fn new(pjm_url: String) -> Self { | ||
Self { | ||
prover_job_monitor_url: pjm_url, | ||
} | ||
} | ||
|
||
pub async fn get_queue(&self) -> anyhow::Result<Queue> { | ||
/// Requests queue report from prover-job-monitor and parse it into Queue HashMap for provided | ||
/// list of jobs. | ||
pub async fn get_queue(&self, jobs: &[QueueReportFields]) -> anyhow::Result<Queue> { | ||
let url = &self.prover_job_monitor_url; | ||
let response = send_request_with_retries(url, MAX_RETRIES, Method::GET, None, None).await; | ||
let response = response.map_err(|err| { | ||
AUTOSCALER_METRICS.calls[&(url.clone(), DEFAULT_ERROR_CODE)].inc(); | ||
anyhow::anyhow!("Failed fetching queue from url: {url}: {err:?}") | ||
anyhow::anyhow!("Failed fetching queue from URL: {url}: {err:?}") | ||
})?; | ||
|
||
AUTOSCALER_METRICS.calls[&(url.clone(), response.status().as_u16())].inc(); | ||
let response = response | ||
.json::<Vec<VersionedQueueReport>>() | ||
.await | ||
.context("Failed to read response as json")?; | ||
Ok(Queue { | ||
queue: response | ||
Ok(Queue( | ||
response | ||
.iter() | ||
.map(|x| (x.version.to_string(), x.report.prover_jobs.queued as u64)) | ||
.flat_map(|versioned_report| { | ||
jobs.iter().map(move |j| { | ||
( | ||
(versioned_report.version.to_string(), j.clone()), | ||
target_to_queue(j, &versioned_report.report), | ||
) | ||
}) | ||
}) | ||
.collect::<HashMap<_, _>>(), | ||
}) | ||
)) | ||
} | ||
} |
Oops, something went wrong.