Skip to content

Commit

Permalink
Merge branch 'main' into iputterman/fixed-schedule
Browse files Browse the repository at this point in the history
  • Loading branch information
lkomali authored Nov 6, 2024
2 parents 7282e1b + 097ca8f commit e5ac4b9
Show file tree
Hide file tree
Showing 30 changed files with 195 additions and 64 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/trigger_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,5 @@ jobs:
curl --fail --request POST \
--form token=${{ secrets.PIPELINE_TOKEN }} \
--form variables[TRITON_PERF_ANALYZER_REPO_TAG]=${GITHUB_HEAD_REF} \
--form variables[TRITON_CLIENT_REPO_TAG]=main \
--form variables[TRITON_CLIENT_REPO_TAG]=${GITHUB_HEAD_REF} \
-F ref=${GITHUB_HEAD_REF} "${{ secrets.PIPELINE_URL }}"
6 changes: 4 additions & 2 deletions genai-perf/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ The easiest way to install GenAI-Perf is through
Install the latest release using the following command:

```bash
export RELEASE="24.09"
export RELEASE="24.10"

docker run -it --net=host --gpus=all nvcr.io/nvidia/tritonserver:${RELEASE}-py3-sdk

Expand Down Expand Up @@ -136,7 +136,7 @@ docker run -ti \
--shm-size=1g --ulimit memlock=-1 \
-v /tmp:/tmp \
-v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
nvcr.io/nvidia/tritonserver:24.09-trtllm-python-py3
nvcr.io/nvidia/tritonserver:24.10-trtllm-python-py3

# Install the Triton CLI
pip install git+https://github.com/triton-inference-server/[email protected]
Expand Down Expand Up @@ -431,6 +431,7 @@ in JSONL format. Example: {\"text\": \"Your prompt here\"}"
The number of unique prompts to generate as stimulus. (default: `100`)

##### `--output-tokens-mean <int>`
##### `--osl`

The mean number of tokens in each output. Ensure the `--tokenizer` value is set
correctly. (default: `-1`)
Expand All @@ -454,6 +455,7 @@ when `--output-tokens-mean` is provided. (default: `0`)
The seed used to generate random values. (default: `0`)

##### `--synthetic-input-tokens-mean <int>`
##### `--isl`

The mean of number of tokens in the generated prompts when using synthetic
data. (default: `550`)
Expand Down
6 changes: 3 additions & 3 deletions genai-perf/docs/lora.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ docker run -it --net=host --rm --gpus=all \
Run GenAI-Perf from the Triton Inference Server SDK container:

```bash
export RELEASE="24.09"
export RELEASE="24.10"

docker run -it --net=host --gpus=all nvcr.io/nvidia/tritonserver:${RELEASE}-py3-sdk

Expand Down Expand Up @@ -149,7 +149,7 @@ docker run \
Run GenAI-Perf from the Triton Inference Server SDK container:

```bash
export RELEASE="24.09"
export RELEASE="24.10"

docker run -it --net=host --gpus=all nvcr.io/nvidia/tritonserver:${RELEASE}-py3-sdk

Expand Down Expand Up @@ -207,7 +207,7 @@ docker run \
Run GenAI-Perf from the Triton Inference Server SDK container:

```bash
export RELEASE="24.09"
export RELEASE="24.10"

docker run -it --net=host --gpus=all nvcr.io/nvidia/tritonserver:${RELEASE}-py3-sdk

Expand Down
2 changes: 1 addition & 1 deletion genai-perf/genai_perf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

__version__ = "0.0.7dev"
__version__ = "0.0.8dev"
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import random
from typing import Any, Dict, List, Union

from genai_perf.exceptions import GenAIPerfException
Expand All @@ -36,6 +35,7 @@
)
from genai_perf.inputs.inputs_config import InputsConfig
from genai_perf.inputs.retrievers.generic_dataset import DataRow, GenericDataset
from genai_perf.utils import sample_bounded_normal


class OpenAIChatCompletionsConverter(BaseConverter):
Expand Down Expand Up @@ -132,7 +132,11 @@ def _add_request_params(self, payload: Dict, config: InputsConfig) -> None:
payload["stream"] = True
if config.output_tokens_mean != DEFAULT_OUTPUT_TOKENS_MEAN:
payload["max_tokens"] = int(
random.gauss(config.output_tokens_mean, config.output_tokens_stddev)
sample_bounded_normal(
mean=config.output_tokens_mean,
stddev=config.output_tokens_stddev,
lower=1, # output token must be >= 1
)
)
for key, value in config.extra_inputs.items():
payload[key] = value
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import random
from typing import Any, Dict

from genai_perf.inputs.converters.base_converter import BaseConverter
from genai_perf.inputs.input_constants import DEFAULT_OUTPUT_TOKENS_MEAN
from genai_perf.inputs.inputs_config import InputsConfig
from genai_perf.inputs.retrievers.generic_dataset import GenericDataset
from genai_perf.utils import sample_bounded_normal


class OpenAICompletionsConverter(BaseConverter):
Expand Down Expand Up @@ -59,7 +59,11 @@ def _add_request_params(self, payload: Dict, config: InputsConfig) -> None:
payload["stream"] = True
if config.output_tokens_mean != DEFAULT_OUTPUT_TOKENS_MEAN:
payload["max_tokens"] = int(
random.gauss(config.output_tokens_mean, config.output_tokens_stddev)
sample_bounded_normal(
mean=config.output_tokens_mean,
stddev=config.output_tokens_stddev,
lower=1, # output token must be >= 1
)
)
for key, value in config.extra_inputs.items():
payload[key] = value
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import random
from typing import Any, Dict

from genai_perf.exceptions import GenAIPerfException
Expand All @@ -36,6 +35,7 @@
)
from genai_perf.inputs.inputs_config import InputsConfig
from genai_perf.inputs.retrievers.generic_dataset import GenericDataset
from genai_perf.utils import sample_bounded_normal


class TensorRTLLMConverter(BaseConverter):
Expand Down Expand Up @@ -71,7 +71,11 @@ def _add_request_params(self, payload: Dict, config: InputsConfig) -> None:
payload["stream"] = [True]
if config.output_tokens_mean != DEFAULT_OUTPUT_TOKENS_MEAN:
number_of_tokens = int(
random.gauss(config.output_tokens_mean, config.output_tokens_stddev)
sample_bounded_normal(
mean=config.output_tokens_mean,
stddev=config.output_tokens_stddev,
lower=1, # output token must be >= 1
)
)
if config.output_tokens_deterministic:
payload["min_length"] = [number_of_tokens]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import random
from typing import Any, Dict

from genai_perf.exceptions import GenAIPerfException
Expand All @@ -36,6 +35,7 @@
)
from genai_perf.inputs.inputs_config import InputsConfig
from genai_perf.inputs.retrievers.generic_dataset import GenericDataset
from genai_perf.utils import sample_bounded_normal


class TensorRTLLMEngineConverter(BaseConverter):
Expand Down Expand Up @@ -71,7 +71,11 @@ def _add_request_params(self, payload: Dict, config: InputsConfig) -> None:
payload["streaming"] = [True]
if config.output_tokens_mean != DEFAULT_OUTPUT_TOKENS_MEAN:
num_tokens = int(
random.gauss(config.output_tokens_mean, config.output_tokens_stddev)
sample_bounded_normal(
mean=config.output_tokens_mean,
stddev=config.output_tokens_stddev,
lower=1, # output token must be >= 1
)
)
payload["request_output_len"] = [num_tokens]
if config.output_tokens_deterministic:
Expand Down
20 changes: 8 additions & 12 deletions genai-perf/genai_perf/inputs/converters/vllm_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json
import random
from typing import Any, Dict

from genai_perf.exceptions import GenAIPerfException
Expand All @@ -36,6 +35,7 @@
)
from genai_perf.inputs.inputs_config import InputsConfig
from genai_perf.inputs.retrievers.generic_dataset import GenericDataset
from genai_perf.utils import sample_bounded_normal


class VLLMConverter(BaseConverter):
Expand Down Expand Up @@ -70,22 +70,18 @@ def _add_request_params(self, payload: Dict, config: InputsConfig) -> None:
if config.add_stream:
payload["stream"] = [True]
if config.output_tokens_mean != DEFAULT_OUTPUT_TOKENS_MEAN:
number_of_tokens = str(
int(
max(
0,
random.gauss(
config.output_tokens_mean,
config.output_tokens_stddev,
),
)
number_of_tokens = int(
sample_bounded_normal(
mean=config.output_tokens_mean,
stddev=config.output_tokens_stddev,
lower=1, # output token must be >= 1
)
)
sampling_parameters = {
"max_tokens": number_of_tokens,
"max_tokens": f"{number_of_tokens}",
}
if config.output_tokens_deterministic:
sampling_parameters["min_tokens"] = number_of_tokens
sampling_parameters["min_tokens"] = f"{number_of_tokens}"
sampling_parameters_str = json.dumps(sampling_parameters)
payload["sampling_parameters"] = [sampling_parameters_str]
for key, value in config.extra_inputs.items():
Expand Down
8 changes: 4 additions & 4 deletions genai-perf/genai_perf/inputs/inputs_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
PromptSource,
)
from genai_perf.inputs.retrievers.synthetic_image_generator import ImageFormat
from genai_perf.tokenizer import DEFAULT_TOKENIZER, Tokenizer, get_tokenizer
from genai_perf.tokenizer import Tokenizer


@dataclass
Expand All @@ -59,6 +59,9 @@ class InputsConfig:
# General Parameters
####################

# The tokenizer to use when generating synthetic prompts
tokenizer: Tokenizer

# If true, adds a steam field to each payload
add_stream: bool = False

Expand Down Expand Up @@ -139,6 +142,3 @@ class InputsConfig:

# Seed used to generate random values
random_seed: int = DEFAULT_RANDOM_SEED

# The tokenizer to use when generating synthetic prompts
tokenizer: Tokenizer = get_tokenizer(DEFAULT_TOKENIZER)
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
from typing import List

from genai_perf.inputs.input_constants import DEFAULT_SYNTHETIC_FILENAME
from genai_perf.inputs.inputs_config import InputsConfig
from genai_perf.inputs.retrievers.base_input_retriever import BaseInputRetriever
from genai_perf.inputs.retrievers.generic_dataset import (
DataRow,
Expand Down
2 changes: 2 additions & 0 deletions genai-perf/genai_perf/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,7 @@ def _add_input_args(parser):

input_group.add_argument(
"--output-tokens-mean",
"--osl",
type=int,
default=ic.DEFAULT_OUTPUT_TOKENS_MEAN,
required=False,
Expand Down Expand Up @@ -497,6 +498,7 @@ def _add_input_args(parser):

input_group.add_argument(
"--synthetic-input-tokens-mean",
"--isl",
type=int,
default=ic.DEFAULT_PROMPT_TOKENS_MEAN,
required=False,
Expand Down
56 changes: 34 additions & 22 deletions genai-perf/genai_perf/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,13 @@

import contextlib
import io
from typing import List
from typing import TYPE_CHECKING, List

from genai_perf.exceptions import GenAIPerfException

# Silence tokenizer warning on import
with contextlib.redirect_stdout(io.StringIO()) as stdout, contextlib.redirect_stderr(
io.StringIO()
) as stderr:
from transformers import AutoTokenizer, BatchEncoding
from transformers import logging as token_logger
# Use TYPE_CHECKING to import BatchEncoding only during static type checks
if TYPE_CHECKING:
from transformers import BatchEncoding

token_logger.set_verbosity_error()
from genai_perf.exceptions import GenAIPerfException

DEFAULT_TOKENIZER = "hf-internal-testing/llama-tokenizer"
DEFAULT_TOKENIZER_REVISION = "main"
Expand All @@ -36,29 +31,37 @@ class Tokenizer:
A small wrapper class around Huggingface Tokenizer
"""

def __init__(self, name: str, trust_remote_code: bool, revision: str) -> None:
def __init__(self) -> None:
"""
Initialize the tokenizer with default values
"""

# default tokenizer parameters for __call__, encode, decode methods
self._call_args = {"add_special_tokens": False}
self._encode_args = {"add_special_tokens": False}
self._decode_args = {"skip_special_tokens": True}

def set_tokenizer(self, name: str, trust_remote_code: bool, revision: str):
"""
Initialize by downloading the tokenizer from Huggingface.co
Downloading the tokenizer from Huggingface.co or local filesystem
"""
try:
# Silence tokenizer warning on first use
# Silence tokenizer warning on import and first use
with contextlib.redirect_stdout(
io.StringIO()
) as stdout, contextlib.redirect_stderr(io.StringIO()) as stderr:
) as stdout, contextlib.redirect_stderr(io.StringIO()):
from transformers import AutoTokenizer
from transformers import logging as token_logger

token_logger.set_verbosity_error()
tokenizer = AutoTokenizer.from_pretrained(
name, trust_remote_code=trust_remote_code, revision=revision
)
except Exception as e:
raise GenAIPerfException(e)

self._tokenizer = tokenizer

# default tokenizer parameters for __call__, encode, decode methods
self._call_args = {"add_special_tokens": False}
self._encode_args = {"add_special_tokens": False}
self._decode_args = {"skip_special_tokens": True}

def __call__(self, text, **kwargs) -> BatchEncoding:
def __call__(self, text, **kwargs) -> "BatchEncoding":
self._call_args.update(kwargs)
return self._tokenizer(text, **self._call_args)

Expand All @@ -74,6 +77,13 @@ def __repr__(self) -> str:
return self._tokenizer.__repr__()


def get_empty_tokenizer() -> Tokenizer:
"""
Return a Tokenizer without a tokenizer set
"""
return Tokenizer()


def get_tokenizer(
tokenizer_model: str,
trust_remote_code: bool = False,
Expand All @@ -82,4 +92,6 @@ def get_tokenizer(
"""
Return tokenizer for the given model name
"""
return Tokenizer(tokenizer_model, trust_remote_code, tokenizer_revision)
tokenizer = Tokenizer()
tokenizer.set_tokenizer(tokenizer_model, trust_remote_code, tokenizer_revision)
return tokenizer
9 changes: 9 additions & 0 deletions genai-perf/genai_perf/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json
import random
from enum import Enum
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Type
Expand Down Expand Up @@ -118,3 +119,11 @@ def get_enum_entry(name: str, enum: Type[Enum]) -> Optional[Enum]:

def scale(value, factor):
return value * factor


def sample_bounded_normal(mean, stddev, lower=float("-inf"), upper=float("inf")):
"""Bound random normal sampling to [lower, upper]. Set the final value to
the boundary value if the value goes below or above the boundaries.
"""
n = random.gauss(mean, stddev)
return min(max(lower, n), upper)
Loading

0 comments on commit e5ac4b9

Please sign in to comment.