Skip to content

Commit

Permalink
feat: Add e2e latnecy and plotting script
Browse files Browse the repository at this point in the history
  • Loading branch information
Hugoch committed Sep 17, 2024
1 parent 1ed5dbf commit 6f7f9d1
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 10 deletions.
18 changes: 9 additions & 9 deletions plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,35 +20,34 @@ def plot():
constant_vus = [result for result in results_filtered if result['executor_type'] == 'ConstantVUs']
constant_vus_x = [result['config']['vus'] for result in constant_vus]
if len(constant_rate) > 0:
plot_inner('Requests/s', constant_rate_x, constant_rate, 'Constant Rate')
plot_inner('Requests/s', constant_rate_x, constant_rate, 'Constant Rate benchmark')
if len(constant_vus) > 0:
plot_inner('VUs', constant_vus_x, constant_vus, 'Constant VUs')
plot_inner('VUs', constant_vus_x, constant_vus, 'Constant VUs benchmark')


def plot_inner(x_name, x_values, results, title):
def plot_inner(x_name, x_values, results, chart_title):
fig, axs = plt.subplots(3, 2, figsize=(15, 20))
fig.tight_layout(pad=6.0)
fig.subplots_adjust(hspace=0.4, wspace=0.2, bottom=0.15)
fig.subplots_adjust(hspace=0.2, wspace=0.2, bottom=0.15, top=0.92)
# compute error rate
for result in results:
result['error_rate'] = result['failed_requests'] / (
result['failed_requests'] + result['successful_requests']) * 100.0

metrics = ['inter_token_latency_ms_p90', 'time_to_first_token_ms_p90', 'token_throughput_secs',
metrics = ['inter_token_latency_ms_p90', 'time_to_first_token_ms_p90', 'e2e_latency_ms_p90', 'token_throughput_secs',
'successful_requests', 'error_rate']

titles = ['Inter Token Latency P90 (lower is better)', 'TTFT P90 (lower is better)',
titles = ['Inter Token Latency P90 (lower is better)', 'TTFT P90 (lower is better)', 'End to End Latency P90 (lower is better)',
'Token Throughput (higher is better)', 'Successful requests', 'Error Rate % (lower is better)']

labels = ['Time (ms)', 'Time (ms)', 'Tokens/s', 'Count', '%']
labels = ['Time (ms)', 'Time (ms)', 'Time (ms)', 'Tokens/s', 'Count', '%']

x = [result['config']['rate'] for result in results]
colors = ['#FF9D00', '#2F5BA1']

# Plot each metric in its respective subplot
for ax, metric, title, label in zip(axs.flatten(), metrics, titles, labels):
data = list(map(lambda result: result[metric], results))
ax.plot(x, data, marker='o', color=colors[0])
ax.plot(x_values, data, marker='o', color=colors[0])
ax.set_title(title)
ax.tick_params(axis='x', rotation=0)
ax.set_ylabel(label)
Expand All @@ -64,6 +63,7 @@ def plot_inner(x_name, x_values, results, title):
# Add grid lines for better readability
ax.grid(True, which='both', axis='y', linestyle='--', linewidth=0.5)
ax.set_axisbelow(True) # Ensure grid lines are below the bars
plt.suptitle(chart_title, fontsize=16)

plt.show()

Expand Down
6 changes: 6 additions & 0 deletions src/benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,9 @@ pub struct BenchmarkResultsWriter {
successful_requests: u64,
request_rate: f64,
total_tokens_sent: u64,
e2e_latency_ms_avg: u128,
e2e_latency_ms_p90: u128,
e2e_latency_ms_p95: u128,
}

impl BenchmarkResultsWriter {
Expand All @@ -395,6 +398,9 @@ impl BenchmarkResultsWriter {
successful_requests: results.successful_requests() as u64,
request_rate: results.successful_request_rate()?,
total_tokens_sent: results.total_tokens_sent(),
e2e_latency_ms_avg: results.e2e_latency_avg().ok().unwrap().as_millis(),
e2e_latency_ms_p90: results.e2e_latency_percentile(0.9)?.as_millis(),
e2e_latency_ms_p95: results.e2e_latency_percentile(0.95)?.as_millis(),
})
}
}
Expand Down
17 changes: 17 additions & 0 deletions src/requests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -447,4 +447,21 @@ impl TextGenerationAggregatedResponse {
}
}
}
pub fn e2e_latency(&self) -> Option<std::time::Duration> {
match self.start_time {
Some(start_time) => {
match self.end_time {
Some(end_time) => {
Some(end_time - start_time)
}
None => {
None
}
}
}
None => {
None
}
}
}
}
26 changes: 25 additions & 1 deletion src/results.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,29 @@ impl BenchmarkResults {
}
}

pub fn e2e_latency_avg(&self) -> anyhow::Result<std::time::Duration> {
if self.is_ready() {
let mut total_time = std::time::Duration::new(0, 0);
for response in self.get_successful_responses() {
total_time += response.e2e_latency().unwrap_or_default();
}
Ok(total_time / self.total_requests() as u32)
} else {
Err(anyhow::anyhow!(NoResponses))
}
}

pub fn e2e_latency_percentile(&self, percentile: f64) -> anyhow::Result<std::time::Duration> {
if self.is_ready() {
let mut times: Vec<std::time::Duration> = self.get_successful_responses().iter().map(|response| response.e2e_latency().unwrap_or_default()).collect();
times.sort();
let index = (percentile * times.len() as f64) as usize;
Ok(times[index])
} else {
Err(anyhow::anyhow!(NoResponses))
}
}

pub fn time_to_first_token_avg(&self) -> anyhow::Result<std::time::Duration> {
if self.is_ready() {
let mut total_time = std::time::Duration::new(0, 0);
Expand Down Expand Up @@ -172,13 +195,14 @@ impl Debug for BenchmarkResults {
.field("end_time", &self.end_time())
.field("total_tokens", &self.total_tokens())
.field("token_throughput_secs", &self.token_throughput_secs().or::<anyhow::Result<f64>>(Ok(-1.0)))
.field("duration", &self.duration().or::<anyhow::Result<Duration>>(Ok(Duration::from_secs(0))))
.field("duration_ms", &self.duration().or::<anyhow::Result<Duration>>(Ok(Duration::from_secs(0))))
.field("average_time_to_first_token", &self.time_to_first_token_avg().or::<anyhow::Result<Duration>>(Ok(Duration::from_secs(0))))
.field("average_inter_token_latency", &self.inter_token_latency_avg().or::<anyhow::Result<Duration>>(Ok(Duration::from_secs(0))))
.field("failed_requests", &self.failed_requests())
.field("successful_requests", &self.successful_requests())
.field("request_rate", &self.successful_request_rate().or::<anyhow::Result<f64>>(Ok(-1.0)))
.field("sent_prompt_tokens", &self.total_tokens_sent())
.field("e2e_latency_avg", &self.e2e_latency_avg().or::<anyhow::Result<Duration>>(Ok(Duration::from_secs(0))))
.finish()
}
}
Expand Down

0 comments on commit 6f7f9d1

Please sign in to comment.