From 3744fe261f7863ecce297cea638f45a97d2cd0a1 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Thu, 21 Nov 2024 11:41:53 +0100
Subject: [PATCH] Update instance type/size in endpoint model_config example
 (#401)

* Update instance type/size in endpoint model_config example

* Remove test file
---
 examples/model_configs/endpoint_model.yaml |  4 ++--
 examples/model_configs/test.yaml           | 22 ----------------------
 2 files changed, 2 insertions(+), 24 deletions(-)
 delete mode 100644 examples/model_configs/test.yaml

diff --git a/examples/model_configs/endpoint_model.yaml b/examples/model_configs/endpoint_model.yaml
index 2834cdd28..4bf2f0602 100644
--- a/examples/model_configs/endpoint_model.yaml
+++ b/examples/model_configs/endpoint_model.yaml
@@ -10,8 +10,8 @@ model:
     accelerator: "gpu"
     region: "eu-west-1"
     vendor: "aws"
-    instance_size: "medium"
-    instance_type: "g5.2xlarge"
+    instance_size: "x1"
+    instance_type: "nvidia-a10g"
     framework: "pytorch"
     endpoint_type: "protected"
     namespace: null # The namespace under which to launch the endopint. Defaults to the current user's namespace
diff --git a/examples/model_configs/test.yaml b/examples/model_configs/test.yaml
deleted file mode 100644
index 9e68c5307..000000000
--- a/examples/model_configs/test.yaml
+++ /dev/null
@@ -1,22 +0,0 @@
-model:
-  type: "endpoint"
-  base_params:
-    endpoint_name: "smollm-360m-instruct-v0-2-q8-lvy" # needs to be lower case without special characters
-    model: HuggingFaceTB/SmolLM-360M-Instruct
-    revision: "main"
-    dtype: "default" # can be any of "awq", "eetq", "gptq", "4bit' or "8bit" (will use bitsandbytes), "bfloat16" or "float16"
-    reuse_existing: true # if true, ignore all params in instance, and don't delete the endpoint after evaluation
-  instance:
-    accelerator: "gpu"
-    region: "eu-west-1"
-    vendor: "aws"
-    instance_size: "medium"
-    instance_type: "g5.2xlarge"
-    framework: "pytorch"
-    endpoint_type: "protected"
-    namespace: null # The namespace under which to launch the endopint. Defaults to the current user's namespace
-    image_url: null # Optionally specify the docker image to use when launching the endpoint model. E.g., launching models with later releases of the TGI container with support for newer models.
-    env_vars:
-      null # Optional environment variables to include when launching the endpoint. e.g., `MAX_INPUT_LENGTH: 2048`
-  generation:
-    add_special_tokens: true