diff --git a/vllm/model_executor/models/gpt_bigcode.py b/vllm/model_executor/models/gpt_bigcode.py index 6c4a04667c5da..24c79a8855475 100644 --- a/vllm/model_executor/models/gpt_bigcode.py +++ b/vllm/model_executor/models/gpt_bigcode.py @@ -25,6 +25,7 @@ from transformers import GPTBigCodeConfig from vllm.attention import Attention, AttentionMetadata +from vllm.compilation.decorators import support_torch_compile from vllm.config import CacheConfig, LoRAConfig from vllm.distributed import get_pp_group, get_tensor_model_parallel_world_size from vllm.model_executor.layers.activation import get_act_fn @@ -187,6 +188,7 @@ def forward( return hidden_states +@support_torch_compile class GPTBigCodeModel(nn.Module): def __init__( diff --git a/vllm/model_executor/models/gpt_j.py b/vllm/model_executor/models/gpt_j.py index d40bf8c88ee19..0451d16b6c738 100644 --- a/vllm/model_executor/models/gpt_j.py +++ b/vllm/model_executor/models/gpt_j.py @@ -23,6 +23,7 @@ from transformers import GPTJConfig from vllm.attention import Attention, AttentionMetadata +from vllm.compilation.decorators import support_torch_compile from vllm.config import CacheConfig from vllm.distributed import get_pp_group, get_tensor_model_parallel_world_size from vllm.model_executor.layers.activation import get_act_fn @@ -174,6 +175,7 @@ def forward( return hidden_states +@support_torch_compile class GPTJModel(nn.Module): def __init__( diff --git a/vllm/model_executor/models/gpt_neox.py b/vllm/model_executor/models/gpt_neox.py index 23a1ca06cc69e..1bccef7a5f173 100644 --- a/vllm/model_executor/models/gpt_neox.py +++ b/vllm/model_executor/models/gpt_neox.py @@ -23,6 +23,7 @@ from transformers import GPTNeoXConfig from vllm.attention import Attention, AttentionMetadata +from vllm.compilation.decorators import support_torch_compile from vllm.config import CacheConfig from vllm.distributed import get_pp_group, get_tensor_model_parallel_world_size from vllm.model_executor.layers.activation import get_act_fn @@ -187,6 +188,7 @@ def forward( return hidden_states +@support_torch_compile class GPTNeoXModel(nn.Module): def __init__( diff --git a/vllm/model_executor/models/granite.py b/vllm/model_executor/models/granite.py index dcf4f5b27704a..5a397ed8ff6a0 100644 --- a/vllm/model_executor/models/granite.py +++ b/vllm/model_executor/models/granite.py @@ -28,6 +28,7 @@ from transformers import GraniteConfig from vllm.attention import Attention, AttentionMetadata +from vllm.compilation.decorators import support_torch_compile from vllm.config import CacheConfig, LoRAConfig from vllm.distributed import (get_pp_group, get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size) @@ -254,6 +255,7 @@ def forward( return hidden_states +@support_torch_compile class GraniteModel(nn.Module): def __init__( diff --git a/vllm/model_executor/models/internlm2.py b/vllm/model_executor/models/internlm2.py index f6cde44e9d83d..9a77e48626ca5 100644 --- a/vllm/model_executor/models/internlm2.py +++ b/vllm/model_executor/models/internlm2.py @@ -7,6 +7,7 @@ from transformers import PretrainedConfig from vllm.attention import Attention, AttentionMetadata +from vllm.compilation.decorators import support_torch_compile from vllm.config import CacheConfig from vllm.distributed import (get_pp_group, get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size, @@ -230,6 +231,7 @@ def forward( return hidden_states, residual +@support_torch_compile class InternLM2Model(nn.Module): def __init__(