add select model

michaelfeil · Jan 18, 2024 · 568b9d3 · 568b9d3
1 parent c4d311d
commit 568b9d3
Show file tree

Hide file tree

Showing 3 changed files with 5 additions and 3 deletions.
diff --git a/libs/infinity_emb/infinity_emb/inference/select_model.py b/libs/infinity_emb/infinity_emb/inference/select_model.py
@@ -64,6 +64,7 @@ def select_model(
     loaded_engine = unloaded_engine.value(model_name_or_path, device=device.value)
 
     min_inference_t = 4e-3
+    max_inference_t = 4e-3
     if model_warmup:
         # size one, warm up warm start timings.
         loaded_engine.warmup(batch_size=batch_size, n_tokens=1)

diff --git a/libs/infinity_emb/poetry.lock b/libs/infinity_emb/poetry.lock
diff --git a/libs/infinity_emb/pyproject.toml b/libs/infinity_emb/pyproject.toml
@@ -27,6 +27,7 @@ ctranslate2 = {version = "^3.21.0", optional=true}
 optimum = {version = "^1.13.2", optional=true}
 fastembed = {version = "0.1.2", optional=true} 
 onnxruntime-gpu = {version = "*", optional=true}
+onnxruntime = {version = "*", optional=true}
 # cache
 diskcache = {version = "*", optional=true}
 
@@ -60,7 +61,7 @@ ct2=["ctranslate2","sentence-transformers","torch"]
 optimum=["optimum"]
 fastembed=["fastembed"]
 torch=["sentence-transformers","torch"]
-onnxruntime-gpu=["onnxruntime-gpu"]
+onnxruntime=["onnxruntime-gpu","onnxruntime"]
 logging=["rich"]
 cache=["diskcache"]
 server=["fastapi", "pydantic", "orjson", "prometheus-fastapi-instrumentator", "uvicorn", "typer","rich"]