From 522108c64cd854f0e5d555f4eb7c761aa9eae377 Mon Sep 17 00:00:00 2001
From: Lyu Han <lvhan_028@163.com>
Date: Tue, 29 Oct 2024 14:40:39 +0800
Subject: [PATCH] Bump version to v0.6.2 (#2659)

* bump version to v0.6.2

* update supported-models list

* update news

* update

* update

* update

* upate

* merge main and fix lint
---
 README.md                                     |  2 +
 README_ja.md                                  |  1 +
 README_zh-CN.md                               |  2 +
 docs/en/get_started/installation.md           |  2 +-
 docs/en/supported_models/supported_models.md  | 63 ++++++++++---------
 docs/zh_cn/get_started/installation.md        |  2 +-
 .../supported_models/supported_models.md      | 63 ++++++++++---------
 lmdeploy/version.py                           |  2 +-
 lmdeploy/vl/model/llava_hf.py                 |  6 +-
 9 files changed, 76 insertions(+), 67 deletions(-)
diff --git a/README.md b/README.md
index 61c0eba45b..6ca5fadedd 100644
--- a/README.md
+++ b/README.md
@@ -26,6 +26,7 @@ ______________________________________________________________________
 <details open>
 <summary><b>2024</b></summary>
 
+- \[2024/10\] PyTorchEngine supports graph mode on ascend platform, doubling the inference speed
 - \[2024/09\] LMDeploy PyTorchEngine adds support for [Huawei Ascend](./docs/en/get_started/ascend/get_started.md). See supported models [here](docs/en/supported_models/supported_models.md)
 - \[2024/09\] LMDeploy PyTorchEngine achieves 1.3x faster on Llama3-8B inference by introducing CUDA graph
 - \[2024/08\] LMDeploy is integrated into [modelscope/swift](https://github.com/modelscope/swift) as the default accelerator for VLMs inference
@@ -162,6 +163,7 @@ For detailed inference benchmarks in more devices and more settings, please refe
   <li>Phi-3-vision (4.2B)</li>
   <li>Phi-3.5-vision (4.2B)</li>
   <li>GLM-4V (9B)</li>
+  <li>Llama3.2-vision (11B, 90B)</li>
 </ul>
 </td>
 </tr>
diff --git a/README_ja.md b/README_ja.md
index 999ebc9f0b..df4647d868 100644
--- a/README_ja.md
+++ b/README_ja.md
@@ -160,6 +160,7 @@ LMDeploy TurboMindエンジンは卓越した推論能力を持ち、さまざ
   <li>Phi-3-vision (4.2B)</li>
   <li>Phi-3.5-vision (4.2B)</li>
   <li>GLM-4V (9B)</li>
+  <li>Llama3.2-vision (11B, 90B)</li>
 </ul>
 </td>
 </tr>
diff --git a/README_zh-CN.md b/README_zh-CN.md
index f002899c60..663b7b24ab 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -26,6 +26,7 @@ ______________________________________________________________________
 <details open>
 <summary><b>2024</b></summary>
 
+- \[2024/10\] PyTorchEngine 在 ascend 平台上支持了图模式，推理性能提高了 1 倍
 - \[2024/09\] LMDeploy PyTorchEngine 增加了对 [华为 Ascend](docs/zh_cn/get_started/ascend/get_started.md) 的支持。支持的模型请见[这里](docs/zh_cn/supported_models/supported_models.md)
 - \[2024/09\] 通过引入 CUDA Graph，LMDeploy PyTorchEngine 在 Llama3-8B 推理上实现了 1.3 倍的加速
 - \[2024/08\] LMDeploy现已集成至 [modelscope/swift](https://github.com/modelscope/swift)，成为 VLMs 推理的默认加速引擎
@@ -163,6 +164,7 @@ LMDeploy TurboMind 引擎拥有卓越的推理能力，在各种规模的模型
   <li>Phi-3-vision (4.2B)</li>
   <li>Phi-3.5-vision (4.2B)</li>
   <li>GLM-4V (9B)</li>
+  <li>Llama3.2-vision (11B, 90B)</li>
 </ul>
 </td>
 </tr>
diff --git a/docs/en/get_started/installation.md b/docs/en/get_started/installation.md
index ab7ee0b30e..b7d03b28a6 100644
--- a/docs/en/get_started/installation.md
+++ b/docs/en/get_started/installation.md
@@ -23,7 +23,7 @@ pip install lmdeploy
 The default prebuilt package is compiled on **CUDA 12**. If CUDA 11+ (>=11.3) is required, you can install lmdeploy by:
 
 ```shell
-export LMDEPLOY_VERSION=0.6.1
+export LMDEPLOY_VERSION=0.6.2
 export PYTHON_VERSION=38
 pip install https://github.com/InternLM/lmdeploy/releases/download/v${LMDEPLOY_VERSION}/lmdeploy-${LMDEPLOY_VERSION}+cu118-cp${PYTHON_VERSION}-cp${PYTHON_VERSION}-manylinux2014_x86_64.whl --extra-index-url https://download.pytorch.org/whl/cu118
 ```
diff --git a/docs/en/supported_models/supported_models.md b/docs/en/supported_models/supported_models.md
index 260120efe0..cd38a60025 100644
--- a/docs/en/supported_models/supported_models.md
+++ b/docs/en/supported_models/supported_models.md
@@ -4,36 +4,37 @@ The following tables detail the models supported by LMDeploy's TurboMind engine
 
 ## TurboMind on CUDA Platform
 
-|         Model         |    Size     | Type | FP16/BF16 | KV INT8 | KV INT4 | W4A16 |
-| :-------------------: | :---------: | :--: | :-------: | :-----: | :-----: | :---: |
-|         Llama         |  7B - 65B   | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
-|        Llama2         |  7B - 70B   | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
-|        Llama3         |   8B, 70B   | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
-|       Llama3.1        |   8B, 70B   | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
-|       Llama3.2        |     3B      | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
-|       InternLM        |  7B - 20B   | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
-|       InternLM2       |  7B - 20B   | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
-|      InternLM2.5      |     7B      | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
-|  InternLM-XComposer2  | 7B, 4khd-7B | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
-| InternLM-XComposer2.5 |     7B      | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
-|         Qwen          | 1.8B - 72B  | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
-|        Qwen1.5        | 1.8B - 110B | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
-|         Qwen2         | 1.5B - 72B  | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
-|        Mistral        |     7B      | LLM  |    Yes    |   Yes   |   Yes   |   -   |
-|        Qwen-VL        |     7B      | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
-|      DeepSeek-VL      |     7B      | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
-|       Baichuan        |     7B      | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
-|       Baichuan2       |     7B      | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
-|      Code Llama       |  7B - 34B   | LLM  |    Yes    |   Yes   |   Yes   |  No   |
-|          YI           |  6B - 34B   | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
-|    LLaVA(1.5,1.6)     |  7B - 34B   | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
-|       InternVL        | v1.1- v1.5  | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
-|       InternVL2       |   2B-76B    | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
-| MiniCPM-Llama3-V-2_5  |      -      | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
-|     MiniCPM-V-2_6     |      -      | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
-|    MiniGeminiLlama    |     7B      | MLLM |    Yes    |    -    |    -    |  Yes  |
-|         GLM4          |     9B      | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
-|       CodeGeeX4       |     9B      | LLM  |    Yes    |   Yes   |   Yes   |   -   |
+|         Model         |     Size     | Type | FP16/BF16 | KV INT8 | KV INT4 | W4A16 |
+| :-------------------: | :----------: | :--: | :-------: | :-----: | :-----: | :---: |
+|         Llama         |   7B - 65B   | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
+|        Llama2         |   7B - 70B   | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
+|        Llama3         |   8B, 70B    | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
+|       Llama3.1        |   8B, 70B    | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
+|       Llama3.2        |      3B      | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
+|       InternLM        |   7B - 20B   | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
+|       InternLM2       |   7B - 20B   | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
+|      InternLM2.5      |      7B      | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
+|  InternLM-XComposer2  | 7B, 4khd-7B  | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
+| InternLM-XComposer2.5 |      7B      | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
+|         Qwen          |  1.8B - 72B  | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
+|        Qwen1.5        | 1.8B - 110B  | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
+|         Qwen2         |  1.5B - 72B  | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
+|        Mistral        |      7B      | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
+|        Mixtral        | 8x7B, 8x22B  | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
+|        Qwen-VL        |      7B      | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
+|      DeepSeek-VL      |      7B      | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
+|       Baichuan        |      7B      | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
+|       Baichuan2       |      7B      | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
+|      Code Llama       |   7B - 34B   | LLM  |    Yes    |   Yes   |   Yes   |  No   |
+|          YI           |   6B - 34B   | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
+|    LLaVA(1.5,1.6)     |   7B - 34B   | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
+|       InternVL        | v1.1 - v1.5  | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
+|       InternVL2       | 2B, 8B - 76B | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
+| MiniCPM-Llama3-V-2_5  |      -       | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
+|     MiniCPM-V-2_6     |      -       | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
+|    MiniGeminiLlama    |      7B      | MLLM |    Yes    |    -    |    -    |  Yes  |
+|         GLM4          |      9B      | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
+|       CodeGeeX4       |      9B      | LLM  |    Yes    |   Yes   |   Yes   |   -   |
 
 "-" means not verified yet.
 
@@ -60,7 +61,7 @@ The TurboMind engine doesn't support window attention. Therefore, for models tha
 |     Falcon     |  7B - 180B  | LLM  |    Yes    |   Yes   |   Yes   |  No  |  No   |
 |       YI       |  6B - 34B   | LLM  |    Yes    |   Yes   |   Yes   |  No  |  Yes  |
 |    Mistral     |     7B      | LLM  |    Yes    |   Yes   |   Yes   |  No  |  No   |
-|    Mixtral     |    8x7B     | LLM  |    Yes    |   Yes   |   Yes   |  No  |  No   |
+|    Mixtral     | 8x7B, 8x22B | LLM  |    Yes    |   Yes   |   Yes   |  No  |  No   |
 |      QWen      | 1.8B - 72B  | LLM  |    Yes    |   Yes   |   Yes   |  No  |  Yes  |
 |    QWen1.5     | 0.5B - 110B | LLM  |    Yes    |   Yes   |   Yes   |  No  |  Yes  |
 |  QWen1.5-MoE   |    A2.7B    | LLM  |    Yes    |   Yes   |   Yes   |  No  |  No   |
diff --git a/docs/zh_cn/get_started/installation.md b/docs/zh_cn/get_started/installation.md
index f7eedecfaa..3108d64815 100644
--- a/docs/zh_cn/get_started/installation.md
+++ b/docs/zh_cn/get_started/installation.md
@@ -23,7 +23,7 @@ pip install lmdeploy
 默认的预构建包是在 **CUDA 12** 上编译的。如果需要 CUDA 11+ (>=11.3)，你可以使用以下命令安装 lmdeploy：
 
 ```shell
-export LMDEPLOY_VERSION=0.6.1
+export LMDEPLOY_VERSION=0.6.2
 export PYTHON_VERSION=38
 pip install https://github.com/InternLM/lmdeploy/releases/download/v${LMDEPLOY_VERSION}/lmdeploy-${LMDEPLOY_VERSION}+cu118-cp${PYTHON_VERSION}-cp${PYTHON_VERSION}-manylinux2014_x86_64.whl --extra-index-url https://download.pytorch.org/whl/cu118
 ```
diff --git a/docs/zh_cn/supported_models/supported_models.md b/docs/zh_cn/supported_models/supported_models.md
index 26930cf3ce..9bdbf0d45d 100644
--- a/docs/zh_cn/supported_models/supported_models.md
+++ b/docs/zh_cn/supported_models/supported_models.md
@@ -4,36 +4,37 @@
 
 ## TurboMind CUDA 平台
 
-|         Model         |    Size     | Type | FP16/BF16 | KV INT8 | KV INT4 | W4A16 |
-| :-------------------: | :---------: | :--: | :-------: | :-----: | :-----: | :---: |
-|         Llama         |  7B - 65B   | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
-|        Llama2         |  7B - 70B   | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
-|        Llama3         |   8B, 70B   | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
-|       Llama3.1        |   8B, 70B   | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
-|       Llama3.2        |     3B      | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
-|       InternLM        |  7B - 20B   | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
-|       InternLM2       |  7B - 20B   | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
-|      InternLM2.5      |     7B      | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
-|  InternLM-XComposer2  | 7B, 4khd-7B | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
-| InternLM-XComposer2.5 |     7B      | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
-|         Qwen          | 1.8B - 72B  | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
-|        Qwen1.5        | 1.8B - 110B | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
-|         Qwen2         | 1.5B - 72B  | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
-|        Mistral        |     7B      | LLM  |    Yes    |   Yes   |   Yes   |   -   |
-|        Qwen-VL        |     7B      | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
-|      DeepSeek-VL      |     7B      | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
-|       Baichuan        |     7B      | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
-|       Baichuan2       |     7B      | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
-|      Code Llama       |  7B - 34B   | LLM  |    Yes    |   Yes   |   Yes   |  No   |
-|          YI           |  6B - 34B   | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
-|    LLaVA(1.5,1.6)     |  7B - 34B   | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
-|       InternVL        | v1.1- v1.5  | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
-|       InternVL2       |   2B-76B    | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
-| MiniCPM-Llama3-V-2_5  |      -      | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
-|     MiniCPM-V-2_6     |      -      | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
-|    MiniGeminiLlama    |     7B      | MLLM |    Yes    |    -    |    -    |  Yes  |
-|         GLM4          |     9B      | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
-|       CodeGeeX4       |     9B      | LLM  |    Yes    |   Yes   |   Yes   |   -   |
+|         Model         |     Size     | Type | FP16/BF16 | KV INT8 | KV INT4 | W4A16 |
+| :-------------------: | :----------: | :--: | :-------: | :-----: | :-----: | :---: |
+|         Llama         |   7B - 65B   | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
+|        Llama2         |   7B - 70B   | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
+|        Llama3         |   8B, 70B    | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
+|       Llama3.1        |   8B, 70B    | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
+|       Llama3.2        |      3B      | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
+|       InternLM        |   7B - 20B   | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
+|       InternLM2       |   7B - 20B   | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
+|      InternLM2.5      |      7B      | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
+|  InternLM-XComposer2  | 7B, 4khd-7B  | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
+| InternLM-XComposer2.5 |      7B      | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
+|         Qwen          |  1.8B - 72B  | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
+|        Qwen1.5        | 1.8B - 110B  | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
+|         Qwen2         |  1.5B - 72B  | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
+|        Mistral        |      7B      | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
+|        Mixtral        | 8x7B, 8x22B  | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
+|        Qwen-VL        |      7B      | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
+|      DeepSeek-VL      |      7B      | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
+|       Baichuan        |      7B      | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
+|       Baichuan2       |      7B      | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
+|      Code Llama       |   7B - 34B   | LLM  |    Yes    |   Yes   |   Yes   |  No   |
+|          YI           |   6B - 34B   | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
+|    LLaVA(1.5,1.6)     |   7B - 34B   | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
+|       InternVL        | v1.1 - v1.5  | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
+|       InternVL2       | 2B, 8B - 76B | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
+| MiniCPM-Llama3-V-2_5  |      -       | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
+|     MiniCPM-V-2_6     |      -       | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
+|    MiniGeminiLlama    |      7B      | MLLM |    Yes    |    -    |    -    |  Yes  |
+|         GLM4          |      9B      | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
+|       CodeGeeX4       |      9B      | LLM  |    Yes    |   Yes   |   Yes   |   -   |
 
 “-” 表示还没有验证。
 
@@ -60,7 +61,7 @@ turbomind 引擎不支持 window attention。所以，对于应用了 window att
 |     Falcon     |  7B - 180B  | LLM  |    Yes    |   Yes   |   Yes   |  No  |  No   |
 |       YI       |  6B - 34B   | LLM  |    Yes    |   Yes   |   Yes   |  No  |  Yes  |
 |    Mistral     |     7B      | LLM  |    Yes    |   Yes   |   Yes   |  No  |  No   |
-|    Mixtral     |    8x7B     | LLM  |    Yes    |   Yes   |   Yes   |  No  |  No   |
+|    Mixtral     | 8x7B, 8x22B | LLM  |    Yes    |   Yes   |   Yes   |  No  |  No   |
 |      QWen      | 1.8B - 72B  | LLM  |    Yes    |   Yes   |   Yes   |  No  |  Yes  |
 |    QWen1.5     | 0.5B - 110B | LLM  |    Yes    |   Yes   |   Yes   |  No  |  Yes  |
 |  QWen1.5-MoE   |    A2.7B    | LLM  |    Yes    |   Yes   |   Yes   |  No  |  No   |
diff --git a/lmdeploy/version.py b/lmdeploy/version.py
index 5237d5f859..b9f76b5761 100644
--- a/lmdeploy/version.py
+++ b/lmdeploy/version.py
@@ -1,7 +1,7 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from typing import Tuple
 
-__version__ = '0.6.1'
+__version__ = '0.6.2'
 short_version = __version__
 
 
diff --git a/lmdeploy/vl/model/llava_hf.py b/lmdeploy/vl/model/llava_hf.py
index c2a0e4afa0..31be101ae8 100644
--- a/lmdeploy/vl/model/llava_hf.py
+++ b/lmdeploy/vl/model/llava_hf.py
@@ -32,14 +32,16 @@ def build_model(self):
                 self.vl_model = model
 
         # fix for llava-hf/llava-interleave-qwen-7b-hf
-        setattr(model.config, "tie_word_embeddings", False)
+        setattr(model.config, 'tie_word_embeddings', False)
         with disable_logging():
             load_checkpoint_and_dispatch(
                 model=model,
                 max_memory=self.max_memory,
                 checkpoint=self.model_path,
                 device_map='auto' if not self.with_llm else {'': 'cpu'},
-                no_split_module_classes=['CLIPEncoderLayer', 'SiglipEncoderLayer'],
+                no_split_module_classes=[
+                    'CLIPEncoderLayer', 'SiglipEncoderLayer'
+                ],
                 dtype=torch.half)
         model.eval()
         self.model = model