Skip to content

[Text Generation][V2] NonKVCachePipeline #1417

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 25 additions & 24 deletions src/deepsparse/transformers/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,7 @@
from onnx import ModelProto

from deepsparse.log import get_main_logger
from deepsparse.utils.onnx import (
_MODEL_DIR_ONNX_NAME,
model_to_path,
truncate_onnx_model,
)
from deepsparse.utils.onnx import MODEL_ONNX_NAME, model_to_path, truncate_onnx_model
from sparsezoo.utils import save_onnx


Expand All @@ -55,6 +51,7 @@ def setup_transformers_pipeline(
sequence_length: int,
tokenizer_padding_side: str = "left",
engine_kwargs: Optional[Dict] = None,
onnx_model_name: Optional[str] = None,
) -> Tuple[
str, transformers.PretrainedConfig, transformers.PreTrainedTokenizer, Dict[str, Any]
]:
Expand All @@ -66,48 +63,46 @@ def setup_transformers_pipeline(
:param tokenizer_padding_side: The side to pad on for the tokenizer,
either "left" or "right"
:param engine_kwargs: The kwargs to pass to the engine
:param onnx_model_name: The name of the onnx model to be loaded.
If not specified, defaults are used (see setup_onnx_file_path)
:return The model path, config, tokenizer, and engine kwargs
"""
model_path, config, tokenizer = fetch_onnx_file_path(model_path, sequence_length)
model_path, config, tokenizer = setup_onnx_file_path(
model_path, sequence_length, onnx_model_name
)

tokenizer.padding_side = tokenizer_padding_side
if not tokenizer.pad_token:
tokenizer.pad_token = tokenizer.eos_token

engine_kwargs = engine_kwargs or {}
if engine_kwargs.get("model_path"):
raise ValueError(
"The engine kwargs already specify "
f"a model path: {engine_kwargs['model_path']}, "
f"but a model path was also provided: {model_path}. "
"Please only provide one."
)
engine_kwargs["model_path"] = model_path
return model_path, config, tokenizer, engine_kwargs


def fetch_onnx_file_path(
def setup_onnx_file_path(
model_path: str,
sequence_length: int,
task: Optional[str] = None,
onnx_model_name: Optional[str] = None,
) -> Tuple[str, transformers.PretrainedConfig, transformers.PreTrainedTokenizer]:
"""
Parses ONNX model from the `model_path` provided. It additionally
creates config and tokenizer objects from the `deployment path`,
derived from the `model_path` provided.
:param model_path: path to the model to be parsed
:param sequence_length: maximum sequence length of the model
:param onnx_model_name: optionally, the precise name of the ONNX model
of interest may be specified. If not specified, the default ONNX model
name will be used (refer to `get_deployment_path` for details)
:return: file path to the processed ONNX file for the engine to compile
"""
deployment_path, onnx_path = get_deployment_path(model_path)
deployment_path, onnx_path = get_deployment_path(model_path, onnx_model_name)

hf_logger = logging.getLogger("transformers")
hf_logger_level = hf_logger.level
hf_logger.setLevel(logging.ERROR)

config = transformers.PretrainedConfig.from_pretrained(
deployment_path, finetuning_task=task
)
config = transformers.PretrainedConfig.from_pretrained(deployment_path)
hf_logger.setLevel(hf_logger_level)

trust_remote_code = False
Expand All @@ -126,7 +121,9 @@ def fetch_onnx_file_path(
return onnx_path, config, tokenizer


def get_deployment_path(model_path: str) -> Tuple[str, str]:
def get_deployment_path(
model_path: str, onnx_model_name: Optional[str] = None
) -> Tuple[str, str]:
"""
Returns the path to the deployment directory
for the given model path and the path to the mandatory
Expand All @@ -135,23 +132,27 @@ def get_deployment_path(model_path: str) -> Tuple[str, str]:
for running the transformers model in the deepsparse pipeline

:param model_path: path to model directory, sparsezoo stub, or ONNX file
:param onnx_model_name: optionally, the precise name of the ONNX model
of interest may be specified. If not specified, the default ONNX model
name will be used.
:return: path to the deployment directory and path to the ONNX file inside
the deployment directory
"""
onnx_model_name = onnx_model_name or MODEL_ONNX_NAME
if os.path.isfile(model_path):
# return the parent directory of the ONNX file
return os.path.dirname(model_path), model_path

if os.path.isdir(model_path):
model_files = os.listdir(model_path)

if _MODEL_DIR_ONNX_NAME not in model_files:
if onnx_model_name not in model_files:
raise ValueError(
f"{_MODEL_DIR_ONNX_NAME} not found in transformers model directory "
f"{onnx_model_name} not found in transformers model directory "
f"{model_path}. Be sure that an export of the model is written to "
f"{os.path.join(model_path, _MODEL_DIR_ONNX_NAME)}"
f"{os.path.join(model_path, onnx_model_name)}"
)
return model_path, os.path.join(model_path, _MODEL_DIR_ONNX_NAME)
return model_path, os.path.join(model_path, onnx_model_name)

elif model_path.startswith("zoo:") or model_path.startswith("hf:"):
onnx_model_path = model_to_path(model_path)
Expand Down
12 changes: 6 additions & 6 deletions src/deepsparse/utils/onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,12 @@
"has_model_kv_cache",
"CACHE_INPUT_PREFIX",
"CACHE_OUTPUT_PREFIX",
"_MODEL_DIR_ONNX_NAME",
"MODEL_ONNX_NAME",
]

_LOGGER = logging.getLogger(__name__)

_MODEL_DIR_ONNX_NAME = "model.onnx"
MODEL_ONNX_NAME = "model.onnx"
CACHE_INPUT_PREFIX = "past_key_values"
CACHE_OUTPUT_PREFIX = "present"

Expand Down Expand Up @@ -132,7 +132,7 @@ def model_to_path(model: Union[str, Model, File]) -> str:
model.deployment.path

# default to the main onnx file for the model
model = model.deployment.get_file(_MODEL_DIR_ONNX_NAME).path
model = model.deployment.get_file(MODEL_ONNX_NAME).path

elif File is not object and isinstance(model, File):
# get the downloaded_path -- will auto download if not on local system
Expand All @@ -143,10 +143,10 @@ def model_to_path(model: Union[str, Model, File]) -> str:
from huggingface_hub import snapshot_download

deployment_path = snapshot_download(repo_id=model.replace("hf:", "", 1))
onnx_path = os.path.join(deployment_path, _MODEL_DIR_ONNX_NAME)
onnx_path = os.path.join(deployment_path, MODEL_ONNX_NAME)
if not os.path.isfile(onnx_path):
raise ValueError(
f"Could not find the ONNX model file '{_MODEL_DIR_ONNX_NAME}' in the "
f"Could not find the ONNX model file '{MODEL_ONNX_NAME}' in the "
f"Hugging Face Hub repository located at {deployment_path}. Please "
f"ensure the model has been correctly exported to ONNX format and "
f"exists in the repository."
Expand All @@ -161,7 +161,7 @@ def model_to_path(model: Union[str, Model, File]) -> str:

model_path = Path(model)
if model_path.is_dir():
return str(model_path / _MODEL_DIR_ONNX_NAME)
return str(model_path / MODEL_ONNX_NAME)

return model

Expand Down
21 changes: 21 additions & 0 deletions src/deepsparse/v2/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# flake8: noqa

# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .operators import *
from .pipeline import *
from .routers import *
from .schedulers import *
from .utils import *
20 changes: 20 additions & 0 deletions src/deepsparse/v2/image_classification/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# flake8: noqa
from .postprocess_operator import *
from .preprocess_operator import *


from .pipeline import * # isort:skip
62 changes: 62 additions & 0 deletions src/deepsparse/v2/image_classification/pipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import warnings
from typing import Dict, Optional, Tuple, Union

from deepsparse.v2.image_classification.postprocess_operator import (
ImageClassificationPostProcess,
)
from deepsparse.v2.image_classification.preprocess_operator import (
ImageClassificationPreProcess,
)
from deepsparse.v2.operators.engine_operator import EngineOperator
from deepsparse.v2.pipeline import Pipeline
from deepsparse.v2.routers.router import LinearRouter
from deepsparse.v2.schedulers.scheduler import OperatorScheduler


_LOGGER = logging.getLogger(__name__)

__all__ = ["ImageClassificationPipeline"]


class ImageClassificationPipeline(Pipeline):
def __init__(
self,
model_path: str,
engine_kwargs: Optional[Dict] = None,
class_names: Union[None, str, Dict[str, str]] = None,
image_size: Optional[Tuple[int]] = None,
top_k: int = 1,
):
if not engine_kwargs:
engine_kwargs = {}
engine_kwargs["model_path"] = model_path
elif engine_kwargs.get("model_path") != model_path:
warnings.warn(f"Updating engine_kwargs to include {model_path}")

engine = EngineOperator(**engine_kwargs)
preproces = ImageClassificationPreProcess(
model_path=engine.model_path, image_size=image_size
)
postprocess = ImageClassificationPostProcess(
top_k=top_k, class_names=class_names
)

ops = [preproces, engine, postprocess]
router = LinearRouter(end_route=len(ops))
scheduler = [OperatorScheduler()]
super().__init__(ops=ops, router=router, schedulers=scheduler)
81 changes: 81 additions & 0 deletions src/deepsparse/v2/image_classification/postprocess_operator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import json
from typing import Dict, List, Union

import numpy
from pydantic import BaseModel, Field

from deepsparse.v2.operators import Operator


class ImageClassificationOutput(BaseModel):
"""
Output model for image classification
"""

labels: List[Union[int, str, List[int], List[str]]] = Field(
description="List of labels, one for each prediction"
)
scores: List[Union[float, List[float]]] = Field(
description="List of scores, one for each prediction"
)


__all__ = ["ImageClassificationPostProcess"]


class ImageClassificationPostProcess(Operator):
"""
Image Classification post-processing Operator. This Operator is responsible for
processing outputs from the engine and returning the classification results to
the user, using the ImageClassifcationOutput structure.
"""

input_schema = None
output_schema = ImageClassificationOutput

def __init__(
self, top_k: int = 1, class_names: Union[None, str, Dict[str, str]] = None
):
self.top_k = top_k
if isinstance(class_names, str) and class_names.endswith(".json"):
self._class_names = json.load(open(class_names))
elif isinstance(class_names, dict):
self._class_names = class_names
else:
self._class_names = None

def run(self, inp: "EngineOperatorOutputs", **kwargs) -> Dict: # noqa: F821
labels, scores = [], []
inp = inp.engine_outputs
for prediction_batch in inp[0]:
label = (-prediction_batch).argsort()[: self.top_k]
score = prediction_batch[label]
labels.append(label)
scores.append(score.tolist())

if self._class_names is not None:
labels = numpy.vectorize(self._class_names.__getitem__)(labels)
labels = labels.tolist()

if isinstance(labels[0], numpy.ndarray):
labels = [label.tolist() for label in labels]

if len(labels) == 1:
labels = labels[0]
scores = scores[0]

return {"scores": scores, "labels": labels}
Loading