Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
ZeldaHuang committed Dec 19, 2024
1 parent fc18695 commit abb36ab
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 5 deletions.
9 changes: 7 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ install:
.PHONY: lint
lint: check_pylint_installed check_pytest_installed
@pylint --rcfile=.pylintrc -s n --jobs=128 ./llumnix

@pylint --rcfile=.pylintrc \
--disable=protected-access,super-init-not-called,unused-argument,redefined-outer-name,invalid-name \
-s n --jobs=128 ./tests
Expand Down Expand Up @@ -61,22 +61,27 @@ test: check_pytest_installed

.PHONY: unit_test
unit_test: check_pytest_installed
@ray stop
@pytest -v --ignore=third_party/ --ignore=tests/e2e_test --disable-warnings

.PHONY: offline_test
offline_test:
@ray stop
@python examlpes/offline_inference.py

.PHONY: e2e_test
e2e_test:
@ray stop
@pytest -v -x -s --tb=long ./tests/e2e_test/test_e2e.py

.PHONY: bench_test
bench_test:
@ray stop
@pytest -v -x -s --tb=long ./tests/e2e_test/test_bench.py

.PHONY: migration_test
migration_test:
@ray stop
@pytest -v -x -s --tb=long ./tests/e2e_test/test_migration.py

####################################### test end ########################################
Expand Down
2 changes: 1 addition & 1 deletion benchmark/benchmark_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ async def query_model_vllm(prompt, verbose, ip_ports):
if verbose:
print('Done')

output = await resp.json(content_type='text/plain')
output = await resp.json()
# necessary for latency calc
output['response_len'] = expected_response_len
if verbose and 'generated_text' in output:
Expand Down
2 changes: 1 addition & 1 deletion llumnix/entrypoints/bladellm/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ async def background_process_outputs(self):
del self.request_streams[request_id]

async def _add_request(self, request: ServerRequest) -> LLMResponse:
if request.sampling_params.n > 1:
if request.sampling_params.n > 1 or request.sampling_params.use_beam_search:
return error_resp(request.id, err_code=400, err_msg="Unsupported feature: multiple sequence decoding in Llumnix.")

llumnix_id = random.randint(0, 2147483647) # 1<<31-1
Expand Down
2 changes: 1 addition & 1 deletion tests/e2e_test/test_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ async def get_llumnix_response(prompt, sampling_params, ip_ports):

async with aiohttp.ClientSession(timeout=timeout) as session:
async with session.post(f'http://{ip_ports}/generate', json=request) as resp:
output = await resp.json('text/plain')
output = await resp.json()
return output

prompts = [
Expand Down

0 comments on commit abb36ab

Please sign in to comment.