Skip to content

Commit

Permalink
2
Browse files Browse the repository at this point in the history
  • Loading branch information
ilya-lavrenov committed Jun 10, 2024
1 parent 178e52d commit f72b8ce
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 14 deletions.
20 changes: 7 additions & 13 deletions vllm/attention/backends/openvino.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from dataclasses import dataclass
from typing import List, Optional, Tuple
from typing import List, Tuple

import openvino as ov
import torch
Expand All @@ -21,7 +21,11 @@ def get_impl_cls():
raise NotImplementedError

@staticmethod
def make_metadata(*args, **kwargs) -> "OpenVINOAttentionMetadata":
def make_metadata(*args, **kwargs) -> "AttentionMetadata":
raise NotImplementedError

@staticmethod
def make_openvino_metadata(*args, **kwargs) -> "OpenVINOAttentionMetadata":
return OpenVINOAttentionMetadata(*args, **kwargs)

@staticmethod
Expand Down Expand Up @@ -55,21 +59,11 @@ def copy_blocks(


@dataclass
class OpenVINOAttentionMetadata(AttentionMetadata):
class OpenVINOAttentionMetadata:
"""Metadata for OpenVINOAttentionBackend.
"""
past_lens: torch.Tensor
subsequence_begins: torch.Tensor
block_indices: torch.Tensor
block_indices_begins: torch.Tensor
max_context_len: torch.Tensor

@property
def prefill_metadata(self) -> Optional["AttentionMetadata"]:
# OpenVINO uses its own metadata format
raise NotImplementedError

@property
def decode_metadata(self) -> Optional["AttentionMetadata"]:
# OpenVINO uses its own metadata format
raise NotImplementedError
2 changes: 1 addition & 1 deletion vllm/worker/openvino_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ def _prepare_model_input(
max_context_len, dtype=torch.int32,
device=self.device) # type: ignore

attn_metadata = self.attn_backend.make_metadata(
attn_metadata = self.attn_backend.make_openvino_metadata(
past_lens=past_lens_tensor,
subsequence_begins=subsequence_begins_tensor,
block_indices=block_indices_tensor,
Expand Down

0 comments on commit f72b8ce

Please sign in to comment.