pytorch · agunapal · Mar 23, 2024 · Mar 23, 2024 · Mar 23, 2024 · Mar 23, 2024
diff --git a/README.md b/README.md
@@ -56,6 +56,20 @@ docker pull pytorch/torchserve-nightly
 
 Refer to [torchserve docker](docker/README.md) for details.
 
+### 🚀 Quick Start Example
+
+```bash
+
+./examples/getting_started/build_image.sh vit  # optional arg --torch.compile
+
+docker run --rm -it --env TORCH_COMPILE=false --env MODEL_NAME=vit --platform linux/amd64 -p 127.0.0.1:8080:8080 -v /home/ubuntu/serve/model_store:/home/model-server/model-store pytorch/torchserve:demo
+
+# In another terminal, run the following command for inference
+curl http://127.0.0.1:8080/predictions/vit -T ./examples/image_classifier/kitten.jpg
+```
+
+Refer to [TorchServe Quick Start Example](https://github.com/pytorch/serve/blob/master/examples/getting_started/README.md) for details.
+
 ## ⚡ Why TorchServe
 * Write once, run anywhere, on-prem, on-cloud, supports inference on CPUs, GPUs, AWS Inf1/Inf2/Trn1, Google Cloud TPUs, [Nvidia MPS](docs/nvidia_mps.md)
 * [Model Management API](docs/management_api.md): multi model management with optimized worker to model allocation

diff --git a/examples/getting_started/Dockerfile b/examples/getting_started/Dockerfile
@@ -0,0 +1,31 @@
+ARG BASE_IMAGE=pytorch/torchserve:latest-cpu
+
+FROM $BASE_IMAGE as server
+ARG BASE_IMAGE
+ARG EXAMPLE_DIR
+ARG HUGGINGFACE_TOKEN
+ENV MODEL_NAME=$MODEL_NAME
+ENV TORCH_COMPILE=$TORCH_COMPILE
+
+USER root
+
+RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \
+    apt-get update && \
+    apt-get install jq wget -y
+
+
+COPY $EXAMPLE_DIR/requirements.txt /home/model-server/getting_started/requirements.txt
+RUN pip install -r /home/model-server/getting_started/requirements.txt
+
+RUN \
+    if echo "$MODEL_NAME" | grep -q "BERT"; then \
+        huggingface-cli login --token $HUGGINGFACE_TOKEN; \
+    fi
+
+COPY $EXAMPLE_DIR  /home/model-server/getting_started
+COPY $EXAMPLE_DIR/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh
+COPY $EXAMPLE_DIR/config.properties /home/model-server/config.properties
+
+WORKDIR /home/model-server/getting_started
+RUN chmod +x /usr/local/bin/dockerd-entrypoint.sh \
+    && chown -R model-server /home/model-server
diff --git a/examples/getting_started/Download_Transformer_models.py b/examples/getting_started/Download_Transformer_models.py
@@ -0,0 +1,188 @@
+import argparse
+import json
+import os
+
+import torch
+import transformers
+from transformers import (
+    AutoConfig,
+    AutoModelForCausalLM,
+    AutoModelForQuestionAnswering,
+    AutoModelForSequenceClassification,
+    AutoModelForTokenClassification,
+    AutoTokenizer,
+    set_seed,
+)
+
+print("Transformers version", transformers.__version__)
+set_seed(1)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+
+def dir_path(path_str):
+    if os.path.isdir(path_str):
+        return path_str
+    else:
+        print(f"{path_str} does not exist, creating directory")
+        os.makedirs(path_str)
+        return path_str
+
+
+def transformers_model_dowloader(
+    mode,
+    pretrained_model_name,
+    num_labels,
+    do_lower_case,
+    max_length,
+    torchscript,
+    hardware,
+    batch_size,
+    model_path,
+):
+    """This function, save the checkpoint, config file along with tokenizer config and vocab files
+    of a transformer model of your choice.
+    """
+    print("Download model and tokenizer", pretrained_model_name)
+    # loading pre-trained model and tokenizer
+    if mode == "sequence_classification":
+        config = AutoConfig.from_pretrained(
+            pretrained_model_name, num_labels=num_labels, torchscript=torchscript
+        )
+        model = AutoModelForSequenceClassification.from_pretrained(
+            pretrained_model_name, config=config
+        )
+        tokenizer = AutoTokenizer.from_pretrained(
+            pretrained_model_name, do_lower_case=do_lower_case
+        )
+    elif mode == "question_answering":
+        config = AutoConfig.from_pretrained(
+            pretrained_model_name, torchscript=torchscript
+        )
+        model = AutoModelForQuestionAnswering.from_pretrained(
+            pretrained_model_name, config=config
+        )
+        tokenizer = AutoTokenizer.from_pretrained(
+            pretrained_model_name, do_lower_case=do_lower_case
+        )
+    elif mode == "token_classification":
+        config = AutoConfig.from_pretrained(
+            pretrained_model_name, num_labels=num_labels, torchscript=torchscript
+        )
+        model = AutoModelForTokenClassification.from_pretrained(
+            pretrained_model_name, config=config
+        )
+        tokenizer = AutoTokenizer.from_pretrained(
+            pretrained_model_name, do_lower_case=do_lower_case
+        )
+    elif mode == "text_generation":
+        config = AutoConfig.from_pretrained(
+            pretrained_model_name, num_labels=num_labels, torchscript=torchscript
+        )
+        model = AutoModelForCausalLM.from_pretrained(
+            pretrained_model_name, config=config
+        )
+        tokenizer = AutoTokenizer.from_pretrained(
+            pretrained_model_name, do_lower_case=do_lower_case
+        )
+
+        # NOTE : for demonstration purposes, we do not go through the fine-tune processing here.
+        # A Fine_tunining process based on your needs can be added.
+        # An example of  Fine_tuned model has been provided in the README.
+
+    print(
+        "Save model and tokenizer/ Torchscript model based on the setting from setup_config",
+        pretrained_model_name,
+        "in directory",
+        model_path,
+    )
+    if save_mode == "pretrained":
+        model.save_pretrained(model_path)
+        tokenizer.save_pretrained(model_path)
+    elif save_mode == "torchscript":
+        dummy_input = "This is a dummy input for torch jit trace"
+        inputs = tokenizer.encode_plus(
+            dummy_input,
+            max_length=int(max_length),
+            pad_to_max_length=True,
+            add_special_tokens=True,
+            return_tensors="pt",
+        )
+        model.to(device).eval()
+        if hardware == "neuron":
+            import torch_neuron
+
+            input_ids = torch.cat([inputs["input_ids"]] * batch_size, 0).to(device)
+            attention_mask = torch.cat([inputs["attention_mask"]] * batch_size, 0).to(
+                device
+            )
+            traced_model = torch_neuron.trace(model, (input_ids, attention_mask))
+            torch.jit.save(
+                traced_model,
+                os.path.join(
+                    NEW_DIR,
+                    "traced_{}_model_neuron_batch_{}.pt".format(model_name, batch_size),
+                ),
+            )
+        elif hardware == "neuronx":
+            import torch_neuronx
+
+            input_ids = torch.cat([inputs["input_ids"]] * batch_size, 0).to(device)
+            attention_mask = torch.cat([inputs["attention_mask"]] * batch_size, 0).to(
+                device
+            )
+            traced_model = torch_neuronx.trace(model, (input_ids, attention_mask))
+            torch.jit.save(
+                traced_model,
+                os.path.join(
+                    NEW_DIR,
+                    "traced_{}_model_neuronx_batch_{}.pt".format(
+                        model_name, batch_size
+                    ),
+                ),
+            )
+        else:
+            input_ids = inputs["input_ids"].to(device)
+            attention_mask = inputs["attention_mask"].to(device)
+            traced_model = torch.jit.trace(model, (input_ids, attention_mask))
+            torch.jit.save(traced_model, os.path.join(NEW_DIR, "traced_model.pt"))
+    return
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model_path",
+        "-o",
+        type=dir_path,
+        default="model",
+        help="Output directory for downloaded model files",
+    )
+    parser.add_argument("--cfg", "-c", type=str, required=True, help="Config")
+    args = parser.parse_args()
+    dirname = os.path.dirname(__file__)
+    f = open(args.cfg)
+    settings = json.load(f)
+    mode = settings["mode"]
+    model_name = settings["model_name"]
+    num_labels = int(settings["num_labels"])
+    do_lower_case = settings["do_lower_case"]
+    max_length = settings["max_length"]
+    save_mode = settings["save_mode"]
+    if save_mode == "torchscript":
+        torchscript = True
+    else:
+        torchscript = False
+    hardware = settings.get("hardware")
+    batch_size = int(settings.get("batch_size", "1"))
+
+    transformers_model_dowloader(
+        mode,
+        model_name,
+        num_labels,
+        do_lower_case,
+        max_length,
+        torchscript,
+        hardware,
+        batch_size,
+        args.model_path,
+    )
diff --git a/examples/getting_started/README.md b/examples/getting_started/README.md
@@ -0,0 +1,57 @@
+# TorchServe Quick Start Examples
+
+## Pre-requisites
+
+1) Docker for CPU runs. To make use of Nvidia GPU, please make sure you have nvidia-docker installed.
+
+## Quick Start Example
+To quickly get started with TorchServe, you can execute the following commands where `serve` is cloned.
+
+```
+./examples/getting_started/build_image.sh vit
+
+docker run --rm -it --env TORCH_COMPILE=false --env MODEL_NAME=vit --platform linux/amd64 -p 127.0.0.1:8080:8080 -p 127.0.0.1:8081:8081 -p 127.0.0.1:8082:8082 -v /home/ubuntu/serve/model_store_1:/home/model-server/model-store pytorch/torchserve:demo
+```
+
+You can point `/home/ubuntu/serve/model_store_1` to a volume where you want the model archives to be stored
+
+In another terminal, run the following command for inference
+```
+curl http://127.0.0.1:8080/predictions/vit -T ./examples/image_classifier/kitten.jpg
+```
+
+### Supported models
+
+The following models are supported in this example
+```
+resnet, densenet, vit, fasterrcnn, bertsc, berttc, bertqa, berttg
+```
+
+We use HuggingFace BERT models. So you need to set `HUGGINGFACE_TOKEN`
+
+```
+export HUGGINGFACE_TOKEN=< Your token>
+```
+
+### `torch.compile`
+
+To enable `torch.compile` with these models, pass this optional argument `--torch.compile`
+
+```
+./examples/getting_started/build_image.sh resnet --torch.compile
+```
+
+## Register multiple models
+
+TorchServe supports multi-model endpoints out of the box. Once, you have loaded a model, you can register it along with any other model using TorchServe's management API.
+Depending on the amount of memory (or GPU memory) you have on your machine, you can load as many models.
+
+```
+curl -X POST "127.0.0.1:8081/models?model_name=resnet&url=/home/ubuntu/serve/model_store_1/resnet"
+```
+You can check all the loaded models using
+```
+curl -X GET "127.0.0.1:8081/models"
+```
+
+For other management APIs, please refer to the [document](https://github.com/pytorch/serve/blob/master/docs/management_api.md)