From 30e0be83a7cabdf4e517778d7d04098a13d34537 Mon Sep 17 00:00:00 2001 From: Gautier Solard <104368859+gsolard@users.noreply.github.com> Date: Wed, 3 Jul 2024 17:13:50 +0200 Subject: [PATCH] Added changes following vllm update (0.5.0.post1) (#4) * Added changes following vllm update * Updated logo url * Properly fixed the logo url * Revert changes for the logo. The github cache was not updated * Added a missing argument --- Dockerfile | 2 +- README.md | 2 +- pyproject.toml | 4 ++-- requirements.txt | 4 ++-- src/happy_vllm/utils_args.py | 7 +++++++ 5 files changed, 13 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index 5e51091..b429158 100644 --- a/Dockerfile +++ b/Dockerfile @@ -24,7 +24,7 @@ ENV VIRTUAL_ENV="/opt/venv" PATH="/opt/venv/bin:${PATH}" WORKDIR /app # Install package -COPY pyproject.toml setup.py README.md requirements.txt version.txt /app +COPY pyproject.toml setup.py README.md requirements.txt version.txt /app/ COPY src/happy_vllm /app/src/happy_vllm RUN python -m pip install -r requirements.txt && python -m pip install . diff --git a/README.md b/README.md index 3202969..1a270bc 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ pip install -e . Just use the entrypoint `happy-vllm` (see [arguments](https://france-travail.github.io/happy_vllm/arguments/) for a list of all possible arguments) ```bash -happy_vllm --model path_to_model --host 127.0.0.1 --port 5000 --model-name my_model +happy-vllm --model path_to_model --host 127.0.0.1 --port 5000 --model-name my_model ``` It will launch the API and you can directly query it for example with diff --git a/pyproject.toml b/pyproject.toml index 5656a5a..0e9eaa1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,9 +13,9 @@ license = {file="LICENSE"} readme = "README.md" requires-python = ">=3.10,<4.0" dependencies = [ - "vllm>=0.4.3,<1.0", + "vllm>=0.5.0.post1,<1.0", "fastapi>=0.111.0,<1.0", - "pydantic_settings>=2.2.1,<3.0", + "pydantic_settings>=2.3.4,<3.0", "uvicorn[standard]>=0.30.1,<1.0", "prometheus_client>=0.20.0,<1.0", "numpy>=1.26.4", diff --git a/requirements.txt b/requirements.txt index 02d53e5..d38840a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ -vllm==0.4.3 +vllm==0.5.0.post1 fastapi==0.111.0 -pydantic-settings==2.2.1 +pydantic-settings==2.3.4 uvicorn[standard]==0.30.1 prometheus_client==0.20.0 numpy==1.26.4 diff --git a/src/happy_vllm/utils_args.py b/src/happy_vllm/utils_args.py index 590730d..9aca817 100644 --- a/src/happy_vllm/utils_args.py +++ b/src/happy_vllm/utils_args.py @@ -126,6 +126,7 @@ class ModelSettings(BaseSettings): revision: Optional[str] = default_args.revision code_revision: Optional[str] = default_args.code_revision rope_scaling: Optional[dict] = default_args.rope_scaling + rope_theta: Optional[float] = None tokenizer_revision: Optional[str] = default_args.tokenizer_revision quantization: Optional[str] = default_args.quantization enforce_eager: bool = False @@ -145,6 +146,7 @@ class ModelSettings(BaseSettings): num_gpu_blocks_override: Optional[int] = default_args.num_gpu_blocks_override num_lookahead_slots: int = default_args.num_lookahead_slots model_loader_extra_config: Optional[dict] = default_args.model_loader_extra_config + preemption_mode: Optional[str] = None max_log_len: Optional[int] = default_args.max_log_len disable_log_requests: bool = False engine_use_ray: bool = False @@ -154,8 +156,12 @@ class ModelSettings(BaseSettings): tokenizer_pool_type: str = default_args.tokenizer_pool_type tokenizer_pool_extra_config: Optional[str] = default_args.tokenizer_pool_extra_config image_input_type: Optional[str] = default_args.image_input_type + image_token_id: Optional[int] = None image_input_shape: Optional[str] = default_args.image_input_shape image_feature_size: Optional[int] = default_args.image_feature_size + image_processor: Optional[str] = None + image_processor_revision: Optional[str] = None + disable_image_processor: bool = False scheduler_delay_factor: float = default_args.scheduler_delay_factor enable_chunked_prefill: bool = False guided_decoding_backend: str = default_args.guided_decoding_backend @@ -166,6 +172,7 @@ class ModelSettings(BaseSettings): speculative_disable_by_batch_size: Optional[int] = default_args.speculative_disable_by_batch_size ngram_prompt_lookup_max: Optional[int] = default_args.ngram_prompt_lookup_max ngram_prompt_lookup_min: Optional[int] = default_args.ngram_prompt_lookup_min + qlora_adapter_name_or_path: Optional[str] = None model_config = SettingsConfigDict(env_file=".env", extra='ignore', protected_namespaces=('settings', ))