From 3744fe261f7863ecce297cea638f45a97d2cd0a1 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 21 Nov 2024 11:41:53 +0100 Subject: [PATCH] Update instance type/size in endpoint model_config example (#401) * Update instance type/size in endpoint model_config example * Remove test file --- examples/model_configs/endpoint_model.yaml | 4 ++-- examples/model_configs/test.yaml | 22 ---------------------- 2 files changed, 2 insertions(+), 24 deletions(-) delete mode 100644 examples/model_configs/test.yaml diff --git a/examples/model_configs/endpoint_model.yaml b/examples/model_configs/endpoint_model.yaml index 2834cdd28..4bf2f0602 100644 --- a/examples/model_configs/endpoint_model.yaml +++ b/examples/model_configs/endpoint_model.yaml @@ -10,8 +10,8 @@ model: accelerator: "gpu" region: "eu-west-1" vendor: "aws" - instance_size: "medium" - instance_type: "g5.2xlarge" + instance_size: "x1" + instance_type: "nvidia-a10g" framework: "pytorch" endpoint_type: "protected" namespace: null # The namespace under which to launch the endopint. Defaults to the current user's namespace diff --git a/examples/model_configs/test.yaml b/examples/model_configs/test.yaml deleted file mode 100644 index 9e68c5307..000000000 --- a/examples/model_configs/test.yaml +++ /dev/null @@ -1,22 +0,0 @@ -model: - type: "endpoint" - base_params: - endpoint_name: "smollm-360m-instruct-v0-2-q8-lvy" # needs to be lower case without special characters - model: HuggingFaceTB/SmolLM-360M-Instruct - revision: "main" - dtype: "default" # can be any of "awq", "eetq", "gptq", "4bit' or "8bit" (will use bitsandbytes), "bfloat16" or "float16" - reuse_existing: true # if true, ignore all params in instance, and don't delete the endpoint after evaluation - instance: - accelerator: "gpu" - region: "eu-west-1" - vendor: "aws" - instance_size: "medium" - instance_type: "g5.2xlarge" - framework: "pytorch" - endpoint_type: "protected" - namespace: null # The namespace under which to launch the endopint. Defaults to the current user's namespace - image_url: null # Optionally specify the docker image to use when launching the endpoint model. E.g., launching models with later releases of the TGI container with support for newer models. - env_vars: - null # Optional environment variables to include when launching the endpoint. e.g., `MAX_INPUT_LENGTH: 2048` - generation: - add_special_tokens: true