From 991c004deba64369507c569dee834bb11d043960 Mon Sep 17 00:00:00 2001 From: Jerzy Zagorski Date: Mon, 9 Dec 2024 10:54:13 +0000 Subject: [PATCH] Upgrade neuron to 2.20.2 This change also fixed loading Neuron models. Neuron uses old pytorch, before the change I was getting TypeError: infer_schema() takes 1 positional argument but 2 were given which was caused by custom_op direct registration. I'm disabling that if custom_op is not available. --- Dockerfile.neuron | 3 ++- vllm/utils.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Dockerfile.neuron b/Dockerfile.neuron index 76dbd4c04d3f3..77162bc82de62 100644 --- a/Dockerfile.neuron +++ b/Dockerfile.neuron @@ -1,5 +1,6 @@ # default base image -ARG BASE_IMAGE="public.ecr.aws/neuron/pytorch-inference-neuronx:2.1.2-neuronx-py310-sdk2.20.0-ubuntu20.04" +# https://gallery.ecr.aws/neuron/pytorch-inference-neuronx +ARG BASE_IMAGE="public.ecr.aws/neuron/pytorch-inference-neuronx:2.1.2-neuronx-py310-sdk2.20.2-ubuntu20.04" FROM $BASE_IMAGE diff --git a/vllm/utils.py b/vllm/utils.py index 1f19d9eacd16d..2bb1fb2af40f4 100644 --- a/vllm/utils.py +++ b/vllm/utils.py @@ -1628,7 +1628,7 @@ def direct_register_custom_op( library object. If you want to bind the operator to a different library, make sure the library object is alive when the operator is used. """ - if is_in_doc_build(): + if is_in_doc_build() or not supports_custom_op(): return import torch.library if hasattr(torch.library, "infer_schema"):