Skip to content

Commit

Permalink
set health check req to high priority
Browse files Browse the repository at this point in the history
  • Loading branch information
mmoskal committed Dec 2, 2024
1 parent 8a859b9 commit 35f2d56
Show file tree
Hide file tree
Showing 5 changed files with 13 additions and 3 deletions.
4 changes: 3 additions & 1 deletion llgtrt/src/routes/health_check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,13 @@ pub async fn model_check(
headers: HeaderMap,
State(app_state): State<Arc<AppState>>,
) -> Result<Response, AppError> {
let req: CompletionCreateParams = serde_json::from_value(json!({
let mut req: CompletionCreateParams = serde_json::from_value(json!({
"model": "model",
"prompt": "Hi",
"max_tokens": 2
}))?;
// set very high priority for this request, so that it returns quickly
req.params.priority = Some(10.0);
let resp = completions::route_completions(headers, State(app_state), Json(req)).await?;
let status = resp.status();
let body = axum::body::to_bytes(resp.into_body(), 1024 * 1024).await?;
Expand Down
1 change: 1 addition & 0 deletions llgtrt/src/routes/openai.rs
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ pub struct CommonCreateParams {
pub logprobs: Option<usize>,

/// Defaults to 0.5. We don't allow it in JSON requests, but can be set internally.
/// Setting to higher value like 1.0 or 10.0 will make the request complete faster.
#[serde(skip)]
pub priority: Option<f32>,
}
Expand Down
2 changes: 1 addition & 1 deletion llguidance
5 changes: 4 additions & 1 deletion scripts/req.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#!/usr/bin/env python3

import requests
import os
import threading
Expand Down Expand Up @@ -247,7 +249,7 @@ def one_round():


def main():
random.seed(0)
# random.seed(0)
parser = argparse.ArgumentParser()
parser.add_argument("--max_threads", type=int, default=0)
parser.add_argument("--sessions", type=int, default=0)
Expand All @@ -259,6 +261,7 @@ def main():
LLG = True
NUM_THREADS = args.sessions
PROMPT_SIZE = 2600
PROMPT_SIZE = 40_000
NUM_REPS = 1
NUM_JOKES = 100
MAX_TOKENS = 4000
Expand Down
4 changes: 4 additions & 0 deletions scripts/test-infer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ curl -X POST "${TRT_API_BASE}chat/completions" \
curl -v "${TRT_API_BASE}health/live"
;;

health)
curl -v "${TRT_API_BASE}health/model"
;;

ready)
curl -v "${TRT_API_BASE}health/ready"
;;
Expand Down

0 comments on commit 35f2d56

Please sign in to comment.