-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest-deploy.yaml
29 lines (29 loc) · 998 Bytes
/
test-deploy.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
image:
# you can use minikube Registry Aliases Addon to setup the registry alias example.com in local test cluster
# https://minikube.sigs.k8s.io/docs/handbook/addons/registry-aliases/
repository: example.com
tag: latest
pullPolicy: Always
# extends plugin timeout to 60 seconds so the model can be downloaded
pluginTimeout: 1200000
startupProbe:
failureThreshold: 30
resources:
requests:
memory: "350M"
limits:
memory: "400M"
appConfig:
modelList:
# use alternative model for embedding
- name: Xenova/bge-small-en-v1.5
# You can set `dtype` to select the precision of the model
# Available values: "fp32" | "fp16" | "q8" | "int8" | "uint8" | "q4" | "bnb4" | "q4f16"
dtype: "q8"
# optional set max length of the input text
# if not set, the value in model config will be used
# if model config does not have max_length, the default value (512) will be used
max_length: 512,
extraction_config:
pooling: "mean"
normalize: true