Skip to content

Commit

Permalink
infinity - Update config.yaml (#383)
Browse files Browse the repository at this point in the history
Pins version of infinity
  • Loading branch information
michaelfeil authored Dec 19, 2024
1 parent 6586093 commit cee381f
Showing 1 changed file with 11 additions and 2 deletions.
13 changes: 11 additions & 2 deletions custom-server/infinity-embedding-server/config.yaml
Original file line number Diff line number Diff line change
@@ -1,16 +1,25 @@
base_image:
image: python:3.11-slim
docker_server:
start_command: sh -c "infinity_emb v2 --model-id BAAI/bge-small-en-v1.5"
start_command: sh -c "HF_TOKEN=$(cat /secrets/hf_access_token) infinity_emb v2 --batch-size 64 --model-id BAAI/bge-small-en-v1.5 --revision main"
readiness_endpoint: /health
liveness_endpoint: /health
predict_endpoint: /embeddings
server_port: 7997
build_commands: # optional step to download the weights of the model into the image
- sh -c "HF_TOKEN=$(cat /secrets/hf_access_token) infinity_emb v2 --preload-only --no-model-warmup --model-id BAAI/bge-small-en-v1.5 --revision main"
resources:
accelerator: L4
use_gpu: true
model_name: infinity-embedding-server
requirements:
- infinity-emb[all]
- infinity-emb[all]==0.0.72
runtime:
predict_concurrency : 40
environment_variables:
hf_access_token: null
# constrain api to at most 256 sentences per request, for better load-balancing
INFINITY_MAX_CLIENT_BATCH_SIZE: 256
# constrain model to a max backpressure of INFINITY_MAX_CLIENT_BATCH_SIZE * predict_concurrency = 10241 requests
INFINITY_QUEUE_SIZE: 10241
DO_NOT_TRACK: 1

0 comments on commit cee381f

Please sign in to comment.