-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy path.env.example
53 lines (36 loc) · 1.04 KB
/
.env.example
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# benchmark target: {vllm, tgis}
TARGET=tgis
# minimum number of input tokens
MIN_INPUT_TOKENS=500
# maximum number of inputs tokens
MAX_INPUT_TOKENS=500
# minimum number of output tokens
MIN_OUTPUT_TOKENS=50
# maximum number of output tokens
MAX_OUTPUT_TOKENS=50
# fraction of greedy requests
FRAC_GREEDY=1.0
# number of input requests to generate (virtual users will sample from these)
SAMPLE_SIZE=1
# Default requests directory
REQUESTS_DIR=/requests
# requests file
REQUESTS_FILENAME=sample_requests.json
# results file
RESULTS_FILENAME=results.json
# filename for combined results
RESULTS_ALL_FILENAME=results_all.json
# number of virtual users
NUM_USERS=1
# number of virtual users
SWEEP_USERS=1,2,4
# experiment duration
DURATION=30s
# if a request fails, we will backoff for some time
BACKOFF=1s
# we allow some grace period for requests to finish when experiment ends
GRACE_PERIOD=5s
# URL for inference server endpoint
# for vLLM this should look like: $(IP_ADDRESS):8000
# and for TGIS this should look like $(IP_ADDRESS):8033
URL=