Skip to content

Commit

Permalink
add select model
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelfeil committed Jan 18, 2024
1 parent c4d311d commit 568b9d3
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 3 deletions.
1 change: 1 addition & 0 deletions libs/infinity_emb/infinity_emb/inference/select_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def select_model(
loaded_engine = unloaded_engine.value(model_name_or_path, device=device.value)

min_inference_t = 4e-3
max_inference_t = 4e-3
if model_warmup:
# size one, warm up warm start timings.
loaded_engine.warmup(batch_size=batch_size, n_tokens=1)
Expand Down
4 changes: 2 additions & 2 deletions libs/infinity_emb/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion libs/infinity_emb/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ ctranslate2 = {version = "^3.21.0", optional=true}
optimum = {version = "^1.13.2", optional=true}
fastembed = {version = "0.1.2", optional=true}
onnxruntime-gpu = {version = "*", optional=true}
onnxruntime = {version = "*", optional=true}
# cache
diskcache = {version = "*", optional=true}

Expand Down Expand Up @@ -60,7 +61,7 @@ ct2=["ctranslate2","sentence-transformers","torch"]
optimum=["optimum"]
fastembed=["fastembed"]
torch=["sentence-transformers","torch"]
onnxruntime-gpu=["onnxruntime-gpu"]
onnxruntime=["onnxruntime-gpu","onnxruntime"]
logging=["rich"]
cache=["diskcache"]
server=["fastapi", "pydantic", "orjson", "prometheus-fastapi-instrumentator", "uvicorn", "typer","rich"]
Expand Down

0 comments on commit 568b9d3

Please sign in to comment.