Skip to content

Commit

Permalink
fix: use status code 400 when batch is empty (#413)
Browse files Browse the repository at this point in the history
  • Loading branch information
OlivierDehaene authored Oct 17, 2024
1 parent 416efe1 commit 205f96c
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 10 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ jobs:

- name: Extract metadata (tags, labels) for Docker
id: meta-grpc
if: ${{ matrix.grpc }}
uses: docker/metadata-action@v5
with:
images: |
Expand All @@ -142,6 +143,7 @@ jobs:
- name: Build and push Docker image
id: build-and-push-grpc
if: ${{ matrix.grpc }}
uses: docker/build-push-action@v6
with:
context: .
Expand Down
7 changes: 7 additions & 0 deletions .github/workflows/matrix.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"sccache": true,
"cudaComputeCap": 75,
"extraBuildArgs": "DEFAULT_USE_FLASH_ATTENTION=False",
"grpc": true,
"dockerfile": "Dockerfile-cuda"
},
{
Expand All @@ -14,6 +15,7 @@
"runOn": "always",
"sccache": true,
"cudaComputeCap": 80,
"grpc": true,
"dockerfile": "Dockerfile-cuda"
},
{
Expand All @@ -22,6 +24,7 @@
"runOn": "main",
"sccache": true,
"cudaComputeCap": 86,
"grpc": true,
"dockerfile": "Dockerfile-cuda"
},
{
Expand All @@ -30,6 +33,7 @@
"runOn": "main",
"sccache": true,
"cudaComputeCap": 89,
"grpc": true,
"dockerfile": "Dockerfile-cuda"
},
{
Expand All @@ -38,20 +42,23 @@
"runOn": "main",
"sccache": true,
"cudaComputeCap": 90,
"grpc": true,
"dockerfile": "Dockerfile-cuda"
},
{
"name": "All",
"imageNamePrefix": "cuda-",
"runOn": "main",
"sccache": false,
"grpc": false,
"dockerfile": "Dockerfile-cuda-all"
},
{
"name": "cpu",
"imageNamePrefix": "cpu-",
"runOn": "main",
"sccache": true,
"grpc": true,
"dockerfile": "Dockerfile"
}
]
1 change: 1 addition & 0 deletions router/src/grpc/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1534,6 +1534,7 @@ impl From<ErrorResponse> for Status {
ErrorType::Overloaded => Code::ResourceExhausted,
ErrorType::Validation => Code::InvalidArgument,
ErrorType::Tokenizer => Code::FailedPrecondition,
ErrorType::Empty => Code::InvalidArgument,
};

Status::new(code, value.error)
Expand Down
45 changes: 35 additions & 10 deletions router/src/http/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ example = json ! ({"error": "Inference failed", "error_type": "backend"})),
example = json ! ({"error": "Model is overloaded", "error_type": "overloaded"})),
(status = 422, description = "Tokenization error", body = ErrorResponse,
example = json ! ({"error": "Tokenization error", "error_type": "tokenizer"})),
(status = 400, description = "Batch is empty", body = ErrorResponse,
example = json ! ({"error": "Batch is empty", "error_type": "empty"})),
(status = 413, description = "Batch size error", body = ErrorResponse,
example = json ! ({"error": "Batch size error", "error_type": "validation"})),
)
Expand Down Expand Up @@ -285,6 +287,8 @@ example = json ! ({"error": "Inference failed", "error_type": "backend"})),
example = json ! ({"error": "Model is overloaded", "error_type": "overloaded"})),
(status = 422, description = "Tokenization error", body = ErrorResponse,
example = json ! ({"error": "Tokenization error", "error_type": "tokenizer"})),
(status = 400, description = "Batch is empty", body = ErrorResponse,
example = json ! ({"error": "Batch is empty", "error_type": "empty"})),
(status = 413, description = "Batch size error", body = ErrorResponse,
example = json ! ({"error": "Batch size error", "error_type": "validation"})),
)
Expand All @@ -306,7 +310,7 @@ async fn rerank(
tracing::error!("{message}");
let err = ErrorResponse {
error: message,
error_type: ErrorType::Validation,
error_type: ErrorType::Empty,
};
let counter = metrics::counter!("te_request_failure", "err" => "validation");
counter.increment(1);
Expand Down Expand Up @@ -471,6 +475,8 @@ example = json ! ({"error": "Inference failed", "error_type": "backend"})),
example = json ! ({"error": "Model is overloaded", "error_type": "overloaded"})),
(status = 422, description = "Tokenization error", body = ErrorResponse,
example = json ! ({"error": "Tokenization error", "error_type": "tokenizer"})),
(status = 400, description = "Batch is empty", body = ErrorResponse,
example = json ! ({"error": "Batch is empty", "error_type": "empty"})),
(status = 413, description = "Batch size error", body = ErrorResponse,
example = json ! ({"error": "Batch size error", "error_type": "validation"})),
)
Expand All @@ -489,7 +495,7 @@ async fn similarity(
tracing::error!("{message}");
let err = ErrorResponse {
error: message,
error_type: ErrorType::Validation,
error_type: ErrorType::Empty,
};
let counter = metrics::counter!("te_request_failure", "err" => "validation");
counter.increment(1);
Expand Down Expand Up @@ -553,6 +559,8 @@ example = json ! ({"error": "Inference failed", "error_type": "backend"})),
example = json ! ({"error": "Model is overloaded", "error_type": "overloaded"})),
(status = 422, description = "Tokenization error", body = ErrorResponse,
example = json ! ({"error": "Tokenization error", "error_type": "tokenizer"})),
(status = 400, description = "Batch is empty", body = ErrorResponse,
example = json ! ({"error": "Batch is empty", "error_type": "empty"})),
(status = 413, description = "Batch size error", body = ErrorResponse,
example = json ! ({"error": "Batch size error", "error_type": "validation"})),
)
Expand Down Expand Up @@ -615,7 +623,7 @@ async fn embed(
tracing::error!("{message}");
let err = ErrorResponse {
error: message,
error_type: ErrorType::Validation,
error_type: ErrorType::Empty,
};
let counter = metrics::counter!("te_request_failure", "err" => "validation");
counter.increment(1);
Expand Down Expand Up @@ -722,6 +730,8 @@ example = json ! ({"error": "Inference failed", "error_type": "backend"})),
example = json ! ({"error": "Model is overloaded", "error_type": "overloaded"})),
(status = 422, description = "Tokenization error", body = ErrorResponse,
example = json ! ({"error": "Tokenization error", "error_type": "tokenizer"})),
(status = 400, description = "Batch is empty", body = ErrorResponse,
example = json ! ({"error": "Batch is empty", "error_type": "empty"})),
(status = 413, description = "Batch size error", body = ErrorResponse,
example = json ! ({"error": "Batch size error", "error_type": "validation"})),
)
Expand Down Expand Up @@ -792,7 +802,7 @@ async fn embed_sparse(
tracing::error!("{message}");
let err = ErrorResponse {
error: message,
error_type: ErrorType::Validation,
error_type: ErrorType::Empty,
};
let counter = metrics::counter!("te_request_failure", "err" => "validation");
counter.increment(1);
Expand Down Expand Up @@ -900,6 +910,8 @@ example = json ! ({"error": "Inference failed", "error_type": "backend"})),
example = json ! ({"error": "Model is overloaded", "error_type": "overloaded"})),
(status = 422, description = "Tokenization error", body = ErrorResponse,
example = json ! ({"error": "Tokenization error", "error_type": "tokenizer"})),
(status = 400, description = "Batch is empty", body = ErrorResponse,
example = json ! ({"error": "Batch is empty", "error_type": "empty"})),
(status = 413, description = "Batch size error", body = ErrorResponse,
example = json ! ({"error": "Batch size error", "error_type": "validation"})),
)
Expand Down Expand Up @@ -961,7 +973,7 @@ async fn embed_all(
tracing::error!("{message}");
let err = ErrorResponse {
error: message,
error_type: ErrorType::Validation,
error_type: ErrorType::Empty,
};
let counter = metrics::counter!("te_request_failure", "err" => "validation");
counter.increment(1);
Expand Down Expand Up @@ -1067,6 +1079,8 @@ example = json ! ({"message": "Inference failed", "type": "backend"})),
example = json ! ({"message": "Model is overloaded", "type": "overloaded"})),
(status = 422, description = "Tokenization error", body = OpenAICompatErrorResponse,
example = json ! ({"message": "Tokenization error", "type": "tokenizer"})),
(status = 400, description = "Batch is empty", body = OpenAICompatErrorResponse,
example = json ! ({"message": "Batch is empty", "type": "empty"})),
(status = 413, description = "Batch size error", body = OpenAICompatErrorResponse,
example = json ! ({"message": "Batch size error", "type": "validation"})),
)
Expand Down Expand Up @@ -1150,7 +1164,7 @@ async fn openai_embed(
tracing::error!("{message}");
let err = ErrorResponse {
error: message,
error_type: ErrorType::Validation,
error_type: ErrorType::Empty,
};
let counter = metrics::counter!("te_request_failure", "err" => "validation");
counter.increment(1);
Expand Down Expand Up @@ -1265,8 +1279,12 @@ path = "/tokenize",
request_body = TokenizeRequest,
responses(
(status = 200, description = "Tokenized ids", body = TokenizeResponse),
(status = 400, description = "Batch is empty", body = ErrorResponse,
example = json ! ({"error": "Batch is empty", "error_type": "empty"})),
(status = 413, description = "Batch size error", body = ErrorResponse,
example = json ! ({"error": "Batch size error", "error_type": "validation"})),
(status = 422, description = "Tokenization error", body = ErrorResponse,
example = json ! ({"message": "Tokenization error", "type": "tokenizer"})),
example = json ! ({"error": "Tokenization error", "error_type": "tokenizer"})),
)
)]
#[instrument(skip_all)]
Expand Down Expand Up @@ -1327,7 +1345,7 @@ async fn tokenize(
tracing::error!("{message}");
let err = ErrorResponse {
error: message,
error_type: ErrorType::Validation,
error_type: ErrorType::Empty,
};
let counter = metrics::counter!("te_request_failure", "err" => "validation");
counter.increment(1);
Expand Down Expand Up @@ -1377,8 +1395,12 @@ path = "/decode",
request_body = DecodeRequest,
responses(
(status = 200, description = "Decoded ids", body = DecodeResponse),
(status = 400, description = "Batch is empty", body = ErrorResponse,
example = json ! ({"error": "Batch is empty", "error_type": "empty"})),
(status = 413, description = "Batch size error", body = ErrorResponse,
example = json ! ({"error": "Batch size error", "error_type": "validation"})),
(status = 422, description = "Tokenization error", body = ErrorResponse,
example = json ! ({"message": "Tokenization error", "type": "tokenizer"})),
example = json ! ({"error": "Tokenization error", "error_type": "tokenizer"})),
)
)]
#[instrument(skip_all)]
Expand All @@ -1403,7 +1425,7 @@ async fn decode(
tracing::error!("{message}");
let err = ErrorResponse {
error: message,
error_type: ErrorType::Validation,
error_type: ErrorType::Empty,
};
let counter = metrics::counter!("te_request_failure", "err" => "validation");
counter.increment(1);
Expand Down Expand Up @@ -1454,6 +1476,8 @@ example = json ! ({"error": "Inference failed", "error_type": "backend"})),
example = json ! ({"error": "Model is overloaded", "error_type": "overloaded"})),
(status = 422, description = "Tokenization error", body = ErrorResponse,
example = json ! ({"error": "Tokenization error", "error_type": "tokenizer"})),
(status = 400, description = "Batch is empty", body = ErrorResponse,
example = json ! ({"error": "Batch is empty", "error_type": "empty"})),
(status = 413, description = "Batch size error", body = ErrorResponse,
example = json ! ({"error": "Batch size error", "error_type": "validation"})),
)
Expand Down Expand Up @@ -1804,6 +1828,7 @@ impl From<&ErrorType> for StatusCode {
ErrorType::Overloaded => StatusCode::TOO_MANY_REQUESTS,
ErrorType::Tokenizer => StatusCode::UNPROCESSABLE_ENTITY,
ErrorType::Validation => StatusCode::PAYLOAD_TOO_LARGE,
ErrorType::Empty => StatusCode::BAD_REQUEST,
}
}
}
Expand Down
1 change: 1 addition & 0 deletions router/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,7 @@ pub enum ErrorType {
Overloaded,
Validation,
Tokenizer,
Empty,
}

#[derive(Serialize)]
Expand Down

0 comments on commit 205f96c

Please sign in to comment.