From 4312cb42788949c0c7a8a57d5fc65ceed6c4c0ea Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Wed, 4 Sep 2024 22:09:59 +0000 Subject: [PATCH 01/45] Add mean and percentile info as computed_field properties such that they become serializable --- src/guidellm/core/report.py | 26 ++++------- src/guidellm/core/result.py | 92 ++++++++++++++++++++++++++++++++++++- 2 files changed, 99 insertions(+), 19 deletions(-) diff --git a/src/guidellm/core/report.py b/src/guidellm/core/report.py index b6791e4..c48eed5 100644 --- a/src/guidellm/core/report.py +++ b/src/guidellm/core/report.py @@ -147,19 +147,15 @@ def _create_benchmark_report_data_tokens_summary( for benchmark in report.benchmarks_sorted: table.add_row( _benchmark_rate_id(benchmark), - f"{benchmark.prompt_token_distribution.mean:.2f}", + f"{benchmark.prompt_token:.2f}", ", ".join( f"{percentile:.1f}" - for percentile in benchmark.prompt_token_distribution.percentiles( - [1, 5, 50, 95, 99] - ) + for percentile in benchmark.prompt_token_percentiles ), - f"{benchmark.output_token_distribution.mean:.2f}", + f"{benchmark.output_token:.2f}", ", ".join( f"{percentile:.1f}" - for percentile in benchmark.output_token_distribution.percentiles( - [1, 5, 50, 95, 99] - ) + for percentile in benchmark.output_token_percentiles ), ) logger.debug("Created data tokens summary table for the report.") @@ -181,7 +177,7 @@ def _create_benchmark_report_dist_perf_summary( "Benchmark", "Request Latency [1%, 5%, 10%, 50%, 90%, 95%, 99%] (sec)", "Time to First Token [1%, 5%, 10%, 50%, 90%, 95%, 99%] (ms)", - "Inter Token Latency [1%, 5%, 10%, 50%, 90% 95%, 99%] (ms)", + "Inter Token Latency [1%, 5%, 10%, 50%, 90%, 95%, 99%] (ms)", title="[magenta]Performance Stats by Benchmark[/magenta]", title_style="bold", title_justify="left", @@ -193,21 +189,15 @@ def _create_benchmark_report_dist_perf_summary( _benchmark_rate_id(benchmark), ", ".join( f"{percentile:.2f}" - for percentile in benchmark.request_latency_distribution.percentiles( - [1, 5, 10, 50, 90, 95, 99] - ) + for percentile in benchmark.request_latency_percentiles ), ", ".join( f"{percentile * 1000:.1f}" - for percentile in benchmark.ttft_distribution.percentiles( - [1, 5, 10, 50, 90, 95, 99] - ) + for percentile in benchmark.time_to_first_token_percentiles ), ", ".join( f"{percentile * 1000:.1f}" - for percentile in benchmark.itl_distribution.percentiles( - [1, 5, 10, 50, 90, 95, 99] - ) + for percentile in benchmark.inter_token_latency_percentiles ), ) logger.debug("Created distribution performance summary table for the report.") diff --git a/src/guidellm/core/result.py b/src/guidellm/core/result.py index f218784..5fd29a8 100644 --- a/src/guidellm/core/result.py +++ b/src/guidellm/core/result.py @@ -2,7 +2,7 @@ from typing import Any, Dict, List, Literal, Optional, Union from loguru import logger -from pydantic import Field +from pydantic import Field, computed_field from guidellm.core.distribution import Distribution from guidellm.core.request import TextGenerationRequest @@ -221,6 +221,7 @@ def __iter__(self): """ return iter(self.results) + @computed_field @property def request_count(self) -> int: """ @@ -231,6 +232,7 @@ def request_count(self) -> int: """ return len(self.results) + @computed_field @property def error_count(self) -> int: """ @@ -241,6 +243,7 @@ def error_count(self) -> int: """ return len(self.errors) + @computed_field @property def total_count(self) -> int: """ @@ -251,6 +254,7 @@ def total_count(self) -> int: """ return self.request_count + self.error_count + @computed_field @property def start_time(self) -> Optional[float]: """ @@ -264,6 +268,7 @@ def start_time(self) -> Optional[float]: return self.results[0].start_time + @computed_field @property def end_time(self) -> Optional[float]: """ @@ -277,6 +282,7 @@ def end_time(self) -> Optional[float]: return self.results[-1].end_time + @computed_field @property def duration(self) -> float: """ @@ -290,6 +296,7 @@ def duration(self) -> float: return self.end_time - self.start_time + @computed_field @property def completed_request_rate(self) -> float: """ @@ -303,6 +310,7 @@ def completed_request_rate(self) -> float: return len(self.results) / self.duration + @computed_field @property def request_latency(self) -> float: """ @@ -332,6 +340,19 @@ def request_latency_distribution(self) -> Distribution: ] ) + @computed_field + @property + def request_latency_percentiles(self) -> List[float]: + """ + Get standard percentiles of request latency in seconds. + + :return: List of percentile request latency in seconds + :rtype: List[float] + """ + return self.request_latency_distribution.percentiles([1, 5, 10, 50, 90, 95, 99]) + + + @computed_field @property def time_to_first_token(self) -> float: """ @@ -360,7 +381,19 @@ def ttft_distribution(self) -> Distribution: if result.first_token_time is not None ] ) + + @computed_field + @property + def time_to_first_token_percentiles(self) -> List[float]: + """ + Get standard percentiles for time taken to decode the first token in milliseconds. + + :return: List of percentile time taken to decode the first token in milliseconds. + :rtype: List[float] + """ + return self.ttft_distribution.percentiles([1, 5, 10, 50, 90, 95, 99]) + @computed_field @property def inter_token_latency(self) -> float: """ @@ -387,7 +420,19 @@ def itl_distribution(self) -> Distribution: decode for result in self.results for decode in result.decode_times.data ] ) + + @computed_field + @property + def inter_token_latency_percentiles(self) -> List[float]: + """ + Get standard percentiles for the time between tokens in milliseconds. + :return: List of percentiles for the average time between tokens. + :rtype: List[float] + """ + return self.itl_distribution.percentiles([1, 5, 10, 50, 90, 95, 99]) + + @computed_field @property def output_token_throughput(self) -> float: """ @@ -403,6 +448,17 @@ def output_token_throughput(self) -> float: return total_tokens / self.duration + @computed_field + @property + def prompt_token(self) -> float: + """ + Get the average number of prompt tokens. + + :return: The average number of prompt tokens. + :rtype: float + """ + return self.prompt_token_distribution.mean + @property def prompt_token_distribution(self) -> Distribution: """ @@ -413,6 +469,28 @@ def prompt_token_distribution(self) -> Distribution: """ return Distribution(data=[result.prompt_token_count for result in self.results]) + @computed_field + @property + def prompt_token_percentiles(self) -> List[float]: + """ + Get standard percentiles for number of prompt tokens. + + :return: List of percentiles of number of prompt tokens. + :rtype: List[float] + """ + return self.prompt_token_distribution.percentiles([1, 5, 50, 95, 99]) + + @computed_field + @property + def output_token(self) -> float: + """ + Get the average number of output tokens. + + :return: The average number of output tokens. + :rtype: float + """ + return self.output_token_distribution.mean + @property def output_token_distribution(self) -> Distribution: """ @@ -423,6 +501,18 @@ def output_token_distribution(self) -> Distribution: """ return Distribution(data=[result.output_token_count for result in self.results]) + @computed_field + @property + def output_token_percentiles(self) -> List[float]: + """ + Get standard percentiles for number of output tokens. + + :return: List of percentiles of number of output tokens. + :rtype: List[float] + """ + return self.output_token_distribution.percentiles([1, 5, 50, 95, 99]) + + @computed_field @property def overloaded(self) -> bool: if ( From 46e10764262ac05cbc7605bc65e4a2fad5b597a2 Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Thu, 5 Sep 2024 01:19:42 +0000 Subject: [PATCH 02/45] quality fixes --- src/guidellm/core/result.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/guidellm/core/result.py b/src/guidellm/core/result.py index 5fd29a8..90906b7 100644 --- a/src/guidellm/core/result.py +++ b/src/guidellm/core/result.py @@ -381,7 +381,7 @@ def ttft_distribution(self) -> Distribution: if result.first_token_time is not None ] ) - + @computed_field @property def time_to_first_token_percentiles(self) -> List[float]: @@ -390,7 +390,7 @@ def time_to_first_token_percentiles(self) -> List[float]: :return: List of percentile time taken to decode the first token in milliseconds. :rtype: List[float] - """ + """ return self.ttft_distribution.percentiles([1, 5, 10, 50, 90, 95, 99]) @computed_field @@ -420,7 +420,7 @@ def itl_distribution(self) -> Distribution: decode for result in self.results for decode in result.decode_times.data ] ) - + @computed_field @property def inter_token_latency_percentiles(self) -> List[float]: From 65fafdefc154a7184926c162fbe90a676cf5870e Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Thu, 5 Sep 2024 01:23:25 +0000 Subject: [PATCH 03/45] quality fix --- src/guidellm/core/result.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/guidellm/core/result.py b/src/guidellm/core/result.py index 90906b7..95a4230 100644 --- a/src/guidellm/core/result.py +++ b/src/guidellm/core/result.py @@ -386,9 +386,11 @@ def ttft_distribution(self) -> Distribution: @property def time_to_first_token_percentiles(self) -> List[float]: """ - Get standard percentiles for time taken to decode the first token in milliseconds. + Get standard percentiles for time taken to decode the first token + in milliseconds. - :return: List of percentile time taken to decode the first token in milliseconds. + :return: List of percentile time taken to decode the first token + in milliseconds. :rtype: List[float] """ return self.ttft_distribution.percentiles([1, 5, 10, 50, 90, 95, 99]) From cc8d2c67c33affb6d99c2e86a090fb803388e098 Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Fri, 6 Sep 2024 14:28:54 +0000 Subject: [PATCH 04/45] Quality fixes --- src/guidellm/core/result.py | 38 ++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/src/guidellm/core/result.py b/src/guidellm/core/result.py index 95a4230..aebd176 100644 --- a/src/guidellm/core/result.py +++ b/src/guidellm/core/result.py @@ -221,7 +221,7 @@ def __iter__(self): """ return iter(self.results) - @computed_field + @computed_field # type: ignore[misc] @property def request_count(self) -> int: """ @@ -232,7 +232,7 @@ def request_count(self) -> int: """ return len(self.results) - @computed_field + @computed_field # type: ignore[misc] @property def error_count(self) -> int: """ @@ -243,7 +243,7 @@ def error_count(self) -> int: """ return len(self.errors) - @computed_field + @computed_field # type: ignore[misc] @property def total_count(self) -> int: """ @@ -254,7 +254,7 @@ def total_count(self) -> int: """ return self.request_count + self.error_count - @computed_field + @computed_field # type: ignore[misc] @property def start_time(self) -> Optional[float]: """ @@ -268,7 +268,7 @@ def start_time(self) -> Optional[float]: return self.results[0].start_time - @computed_field + @computed_field # type: ignore[misc] @property def end_time(self) -> Optional[float]: """ @@ -282,7 +282,7 @@ def end_time(self) -> Optional[float]: return self.results[-1].end_time - @computed_field + @computed_field # type: ignore[misc] @property def duration(self) -> float: """ @@ -296,7 +296,7 @@ def duration(self) -> float: return self.end_time - self.start_time - @computed_field + @computed_field # type: ignore[misc] @property def completed_request_rate(self) -> float: """ @@ -310,7 +310,7 @@ def completed_request_rate(self) -> float: return len(self.results) / self.duration - @computed_field + @computed_field # type: ignore[misc] @property def request_latency(self) -> float: """ @@ -340,7 +340,7 @@ def request_latency_distribution(self) -> Distribution: ] ) - @computed_field + @computed_field # type: ignore[misc] @property def request_latency_percentiles(self) -> List[float]: """ @@ -352,7 +352,7 @@ def request_latency_percentiles(self) -> List[float]: return self.request_latency_distribution.percentiles([1, 5, 10, 50, 90, 95, 99]) - @computed_field + @computed_field # type: ignore[misc] @property def time_to_first_token(self) -> float: """ @@ -382,7 +382,7 @@ def ttft_distribution(self) -> Distribution: ] ) - @computed_field + @computed_field # type: ignore[misc] @property def time_to_first_token_percentiles(self) -> List[float]: """ @@ -395,7 +395,7 @@ def time_to_first_token_percentiles(self) -> List[float]: """ return self.ttft_distribution.percentiles([1, 5, 10, 50, 90, 95, 99]) - @computed_field + @computed_field # type: ignore[misc] @property def inter_token_latency(self) -> float: """ @@ -423,7 +423,7 @@ def itl_distribution(self) -> Distribution: ] ) - @computed_field + @computed_field # type: ignore[misc] @property def inter_token_latency_percentiles(self) -> List[float]: """ @@ -434,7 +434,7 @@ def inter_token_latency_percentiles(self) -> List[float]: """ return self.itl_distribution.percentiles([1, 5, 10, 50, 90, 95, 99]) - @computed_field + @computed_field # type: ignore[misc] @property def output_token_throughput(self) -> float: """ @@ -450,7 +450,7 @@ def output_token_throughput(self) -> float: return total_tokens / self.duration - @computed_field + @computed_field # type: ignore[misc] @property def prompt_token(self) -> float: """ @@ -471,7 +471,7 @@ def prompt_token_distribution(self) -> Distribution: """ return Distribution(data=[result.prompt_token_count for result in self.results]) - @computed_field + @computed_field # type: ignore[misc] @property def prompt_token_percentiles(self) -> List[float]: """ @@ -482,7 +482,7 @@ def prompt_token_percentiles(self) -> List[float]: """ return self.prompt_token_distribution.percentiles([1, 5, 50, 95, 99]) - @computed_field + @computed_field # type: ignore[misc] @property def output_token(self) -> float: """ @@ -503,7 +503,7 @@ def output_token_distribution(self) -> Distribution: """ return Distribution(data=[result.output_token_count for result in self.results]) - @computed_field + @computed_field # type: ignore[misc] @property def output_token_percentiles(self) -> List[float]: """ @@ -514,7 +514,7 @@ def output_token_percentiles(self) -> List[float]: """ return self.output_token_distribution.percentiles([1, 5, 50, 95, 99]) - @computed_field + @computed_field # type: ignore[misc] @property def overloaded(self) -> bool: if ( From bb9bc0c1b8d72fc34d2acce8421e15a00077e4c2 Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Mon, 4 Nov 2024 19:07:22 +0000 Subject: [PATCH 05/45] Add class to describe image samples and loading logic for images from url --- src/guidellm/utils/images.py | 69 ++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 src/guidellm/utils/images.py diff --git a/src/guidellm/utils/images.py b/src/guidellm/utils/images.py new file mode 100644 index 0000000..5e96ce1 --- /dev/null +++ b/src/guidellm/utils/images.py @@ -0,0 +1,69 @@ +from PIL import Image +from bs4 import BeautifulSoup +from urllib.parse import urljoin, urlparse +from pydantic import Field, ConfigDict +from typing import List, Optional +from io import BytesIO + +from loguru import logger + +import requests + +from guidellm.config import settings +from guidellm.core.serializable import Serializable + +__all__ = ["load_images", "ImageDescriptor"] + +class ImageDescriptor(Serializable): + """ + A class to represent image data in serializable format. + """ + model_config = ConfigDict(arbitrary_types_allowed=True) + + url: Optional[str] = Field(description="url address for image.") + image: Image.Image = Field(description="PIL image", exclude=True) + filename: Optional[int] = Field( + default=None, + description="Image filename.", + ) + + +def load_images(data: str) -> List[ImageDescriptor]: + """ + Load an HTML file from a path or URL + + :param data: the path or URL to load the HTML file from + :type data: Union[str, Path] + :return: Descriptor containing image url and the data in PIL.Image.Image format + :rtype: ImageDescriptor + """ + + images = [] + if not data: + return None + if isinstance(data, str) and data.startswith("http"): + response = requests.get(data, timeout=settings.request_timeout) + response.raise_for_status() + + soup = BeautifulSoup(response.text, 'html.parser') + for img_tag in soup.find_all("img"): + img_url = img_tag.get("src") + + if img_url: + # Handle relative URLs + img_url = urljoin(data, img_url) + + # Download the image + logger.debug("Loading image: {}", img_url) + img_response = requests.get(img_url) + img_response.raise_for_status() + + # Load image into Pillow + images.append( + ImageDescriptor( + url=img_url, + image=Image.open(BytesIO(img_response.content)), + ) + ) + + return images \ No newline at end of file From 59002b511339a22833a20a51023c40b680a1a3f5 Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Mon, 4 Nov 2024 19:07:49 +0000 Subject: [PATCH 06/45] Add class to describe image samples and loading logic for images from url --- src/guidellm/utils/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/guidellm/utils/__init__.py b/src/guidellm/utils/__init__.py index 2fdd8ca..6f2f669 100644 --- a/src/guidellm/utils/__init__.py +++ b/src/guidellm/utils/__init__.py @@ -12,6 +12,7 @@ split_lines_by_punctuation, split_text, ) +from .images import load_images, ImageDescriptor from .transformers import ( load_transformers_dataset, resolve_transformers_dataset, From cb1f244ac5ce15dad230bbf84f541868b3ffa393 Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Mon, 4 Nov 2024 19:10:01 +0000 Subject: [PATCH 07/45] Add url used to download images from for emulated requests --- src/guidellm/config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/guidellm/config.py b/src/guidellm/config.py index c3d950e..df750ea 100644 --- a/src/guidellm/config.py +++ b/src/guidellm/config.py @@ -90,6 +90,7 @@ class EmulatedDataSettings(BaseModel): "force_new_line_punctuation": True, } ) + image_source: List[str] = "https://www.gutenberg.org/cache/epub/1342/pg1342-images.html" class OpenAISettings(BaseModel): From 24e652721ef3fe8268754f5802cb49e878145384 Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Mon, 4 Nov 2024 19:10:24 +0000 Subject: [PATCH 08/45] Add support to images in requests --- src/guidellm/backend/openai.py | 25 ++++++++++++++++++++++--- src/guidellm/core/request.py | 15 ++++++++++++++- src/guidellm/request/emulated.py | 17 +++++++++++++++-- 3 files changed, 51 insertions(+), 6 deletions(-) diff --git a/src/guidellm/backend/openai.py b/src/guidellm/backend/openai.py index 8c83f91..c740b34 100644 --- a/src/guidellm/backend/openai.py +++ b/src/guidellm/backend/openai.py @@ -1,4 +1,5 @@ from typing import AsyncGenerator, Dict, List, Optional +import io, base64 from loguru import logger from openai import AsyncOpenAI, OpenAI @@ -103,11 +104,11 @@ async def make_request( request_args.update(self._request_args) + messages = self._build_messages(request) + stream = await self._async_client.chat.completions.create( model=self.model, - messages=[ - {"role": "user", "content": request.prompt}, - ], + messages=messages, stream=True, **request_args, ) @@ -167,3 +168,21 @@ def validate_connection(self): except Exception as error: logger.error("Failed to validate OpenAI connection: {}", error) raise error + + def _build_messages(self, request: TextGenerationRequest) -> Dict: + if request.number_images == 0: + messages = [{"role": "user", "content": request.prompt}] + else: + content = [] + for image in request.images: + stream = io.BytesIO() + im_format = image.image.format or "PNG" + image.image.save(stream, format=im_format) + im_b64 = base64.b64encode(stream.getvalue()).decode("ascii") + image_url = {"url": f"data:image/{im_format.lower()};base64,{im_b64}"} + content.append({"type": "image_url", "image_url": image_url}) + + content.append({"type": "text", "text": request.prompt}) + messages = [{"role": "user", "content": content}] + + return messages diff --git a/src/guidellm/core/request.py b/src/guidellm/core/request.py index 4f7315c..8f93b56 100644 --- a/src/guidellm/core/request.py +++ b/src/guidellm/core/request.py @@ -1,9 +1,10 @@ import uuid -from typing import Any, Dict, Optional +from typing import Any, Dict, Optional, List from pydantic import Field from guidellm.core.serializable import Serializable +from guidellm.utils import ImageDescriptor class TextGenerationRequest(Serializable): @@ -16,6 +17,10 @@ class TextGenerationRequest(Serializable): description="The unique identifier for the request.", ) prompt: str = Field(description="The input prompt for the text generation.") + images: Optional[List[ImageDescriptor]] = Field( + default=None, + description="Input images.", + ) prompt_token_count: Optional[int] = Field( default=None, description="The number of tokens in the input prompt.", @@ -29,6 +34,13 @@ class TextGenerationRequest(Serializable): description="The parameters for the text generation request.", ) + @property + def number_images(self) -> int: + if self.images is None: + return 0 + else: + return len(self.images) + def __str__(self) -> str: prompt_short = ( self.prompt[:32] + "..." @@ -41,4 +53,5 @@ def __str__(self) -> str: f"prompt={prompt_short}, prompt_token_count={self.prompt_token_count}, " f"output_token_count={self.output_token_count}, " f"params={self.params})" + f"images={self.number_images}" ) diff --git a/src/guidellm/request/emulated.py b/src/guidellm/request/emulated.py index 7d481cb..b7053de 100644 --- a/src/guidellm/request/emulated.py +++ b/src/guidellm/request/emulated.py @@ -11,7 +11,7 @@ from guidellm.config import settings from guidellm.core.request import TextGenerationRequest from guidellm.request.base import GenerationMode, RequestGenerator -from guidellm.utils import clean_text, filter_text, load_text, split_text +from guidellm.utils import clean_text, filter_text, load_text, split_text, load_images __all__ = ["EmulatedConfig", "EmulatedRequestGenerator", "EndlessTokens"] @@ -30,6 +30,7 @@ class EmulatedConfig: generated_tokens_variance (Optional[int]): Variance for generated tokens. generated_tokens_min (Optional[int]): Minimum number of generated tokens. generated_tokens_max (Optional[int]): Maximum number of generated tokens. + images (Optional[int]): Number of input images. """ @staticmethod @@ -47,7 +48,7 @@ def create_config(config: Optional[Union[str, Path, Dict]]) -> "EmulatedConfig": """ if not config: logger.debug("Creating default configuration") - return EmulatedConfig(prompt_tokens=1024, generated_tokens=256) + return EmulatedConfig(prompt_tokens=1024, generated_tokens=256, images=0) if isinstance(config, dict): logger.debug("Loading configuration from dict: {}", config) @@ -105,6 +106,8 @@ def create_config(config: Optional[Union[str, Path, Dict]]) -> "EmulatedConfig": generated_tokens_min: Optional[int] = None generated_tokens_max: Optional[int] = None + images: int = 0 + @property def prompt_tokens_range(self) -> Tuple[int, int]: """ @@ -327,6 +330,8 @@ def __init__( settings.emulated_data.filter_start, settings.emulated_data.filter_end, ) + if self._config.images > 0: + self._images = load_images(settings.emulated_data.image_source) self._rng = np.random.default_rng(random_seed) # NOTE: Must be after all the parameters since the queue population @@ -355,6 +360,7 @@ def create_item(self) -> TextGenerationRequest: logger.debug("Creating new text generation request") target_prompt_token_count = self._config.sample_prompt_tokens(self._rng) prompt = self.sample_prompt(target_prompt_token_count) + images = self.sample_images() prompt_token_count = len(self.tokenizer.tokenize(prompt)) output_token_count = self._config.sample_output_tokens(self._rng) logger.debug("Generated prompt: {}", prompt) @@ -363,6 +369,7 @@ def create_item(self) -> TextGenerationRequest: prompt=prompt, prompt_token_count=prompt_token_count, output_token_count=output_token_count, + images=images, ) def sample_prompt(self, tokens: int) -> str: @@ -395,3 +402,9 @@ def sample_prompt(self, tokens: int) -> str: right = mid return self._tokens.create_text(start_line_index, left) + + + def sample_images(self): + image_indices = self._rng.choice(len(self._images), size=self._config.images, replace=False) + + return [self._images[i] for i in image_indices] \ No newline at end of file From 394670999785536b696978eae411cbcf7c4583cd Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Mon, 4 Nov 2024 19:45:56 +0000 Subject: [PATCH 09/45] quality fixes --- src/guidellm/backend/openai.py | 7 ++++--- src/guidellm/core/request.py | 2 +- src/guidellm/request/emulated.py | 8 ++++---- src/guidellm/utils/__init__.py | 2 +- src/guidellm/utils/images.py | 27 +++++++++++++-------------- 5 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/guidellm/backend/openai.py b/src/guidellm/backend/openai.py index c740b34..f75bb3b 100644 --- a/src/guidellm/backend/openai.py +++ b/src/guidellm/backend/openai.py @@ -1,5 +1,6 @@ +import base64 +import io from typing import AsyncGenerator, Dict, List, Optional -import io, base64 from loguru import logger from openai import AsyncOpenAI, OpenAI @@ -182,7 +183,7 @@ def _build_messages(self, request: TextGenerationRequest) -> Dict: image_url = {"url": f"data:image/{im_format.lower()};base64,{im_b64}"} content.append({"type": "image_url", "image_url": image_url}) - content.append({"type": "text", "text": request.prompt}) + content.append({"type": "text", "text": request.prompt}) messages = [{"role": "user", "content": content}] - + return messages diff --git a/src/guidellm/core/request.py b/src/guidellm/core/request.py index 8f93b56..a1ff199 100644 --- a/src/guidellm/core/request.py +++ b/src/guidellm/core/request.py @@ -1,5 +1,5 @@ import uuid -from typing import Any, Dict, Optional, List +from typing import Any, Dict, List, Optional from pydantic import Field diff --git a/src/guidellm/request/emulated.py b/src/guidellm/request/emulated.py index b7053de..9dc3825 100644 --- a/src/guidellm/request/emulated.py +++ b/src/guidellm/request/emulated.py @@ -11,7 +11,7 @@ from guidellm.config import settings from guidellm.core.request import TextGenerationRequest from guidellm.request.base import GenerationMode, RequestGenerator -from guidellm.utils import clean_text, filter_text, load_text, split_text, load_images +from guidellm.utils import clean_text, filter_text, load_images, load_text, split_text __all__ = ["EmulatedConfig", "EmulatedRequestGenerator", "EndlessTokens"] @@ -402,9 +402,9 @@ def sample_prompt(self, tokens: int) -> str: right = mid return self._tokens.create_text(start_line_index, left) - - + + def sample_images(self): image_indices = self._rng.choice(len(self._images), size=self._config.images, replace=False) - return [self._images[i] for i in image_indices] \ No newline at end of file + return [self._images[i] for i in image_indices] diff --git a/src/guidellm/utils/__init__.py b/src/guidellm/utils/__init__.py index 6f2f669..1e51f22 100644 --- a/src/guidellm/utils/__init__.py +++ b/src/guidellm/utils/__init__.py @@ -1,3 +1,4 @@ +from .images import ImageDescriptor, load_images from .injector import create_report, inject_data from .progress import BenchmarkReportProgress from .text import ( @@ -12,7 +13,6 @@ split_lines_by_punctuation, split_text, ) -from .images import load_images, ImageDescriptor from .transformers import ( load_transformers_dataset, resolve_transformers_dataset, diff --git a/src/guidellm/utils/images.py b/src/guidellm/utils/images.py index 5e96ce1..5d73bc0 100644 --- a/src/guidellm/utils/images.py +++ b/src/guidellm/utils/images.py @@ -1,13 +1,12 @@ -from PIL import Image -from bs4 import BeautifulSoup -from urllib.parse import urljoin, urlparse -from pydantic import Field, ConfigDict -from typing import List, Optional from io import BytesIO - -from loguru import logger +from typing import List, Optional +from urllib.parse import urljoin import requests +from bs4 import BeautifulSoup +from loguru import logger +from PIL import Image +from pydantic import ConfigDict, Field from guidellm.config import settings from guidellm.core.serializable import Serializable @@ -19,14 +18,14 @@ class ImageDescriptor(Serializable): A class to represent image data in serializable format. """ model_config = ConfigDict(arbitrary_types_allowed=True) - + url: Optional[str] = Field(description="url address for image.") image: Image.Image = Field(description="PIL image", exclude=True) filename: Optional[int] = Field( default=None, description="Image filename.", ) - + def load_images(data: str) -> List[ImageDescriptor]: """ @@ -45,25 +44,25 @@ def load_images(data: str) -> List[ImageDescriptor]: response = requests.get(data, timeout=settings.request_timeout) response.raise_for_status() - soup = BeautifulSoup(response.text, 'html.parser') + soup = BeautifulSoup(response.text, "html.parser") for img_tag in soup.find_all("img"): img_url = img_tag.get("src") if img_url: # Handle relative URLs img_url = urljoin(data, img_url) - + # Download the image logger.debug("Loading image: {}", img_url) img_response = requests.get(img_url) img_response.raise_for_status() - + # Load image into Pillow images.append( ImageDescriptor( - url=img_url, + url=img_url, image=Image.open(BytesIO(img_response.content)), ) ) - return images \ No newline at end of file + return images From 7d93b020d34e28ebdf04e7bc39a13c867fa6ef97 Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Mon, 4 Nov 2024 19:53:57 +0000 Subject: [PATCH 10/45] Quality fixes --- src/guidellm/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/guidellm/__init__.py b/src/guidellm/__init__.py index e562018..b10b445 100644 --- a/src/guidellm/__init__.py +++ b/src/guidellm/__init__.py @@ -6,6 +6,7 @@ # flake8: noqa import os + import transformers # type: ignore os.environ["TOKENIZERS_PARALLELISM"] = "false" # Silence warnings for tokenizers From a441dade284aa9e682c652430a739d7298c3e82e Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Mon, 4 Nov 2024 20:01:26 +0000 Subject: [PATCH 11/45] Quality fixes --- src/guidellm/request/emulated.py | 4 +++- src/guidellm/utils/__init__.py | 2 ++ src/guidellm/utils/images.py | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/guidellm/request/emulated.py b/src/guidellm/request/emulated.py index 9dc3825..f15387e 100644 --- a/src/guidellm/request/emulated.py +++ b/src/guidellm/request/emulated.py @@ -405,6 +405,8 @@ def sample_prompt(self, tokens: int) -> str: def sample_images(self): - image_indices = self._rng.choice(len(self._images), size=self._config.images, replace=False) + image_indices = self._rng.choice( + len(self._images), size=self._config.images, replace=False, + ) return [self._images[i] for i in image_indices] diff --git a/src/guidellm/utils/__init__.py b/src/guidellm/utils/__init__.py index 1e51f22..eb4931b 100644 --- a/src/guidellm/utils/__init__.py +++ b/src/guidellm/utils/__init__.py @@ -38,4 +38,6 @@ "resolve_transformers_dataset_split", "split_lines_by_punctuation", "split_text", + "ImageDescriptor", + "load_images", ] diff --git a/src/guidellm/utils/images.py b/src/guidellm/utils/images.py index 5d73bc0..5c5a727 100644 --- a/src/guidellm/utils/images.py +++ b/src/guidellm/utils/images.py @@ -65,4 +65,4 @@ def load_images(data: str) -> List[ImageDescriptor]: ) ) - return images + return images From 570670b6c2a24869a40635f0112af7d92da0e73c Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Tue, 5 Nov 2024 01:11:06 +0000 Subject: [PATCH 12/45] Quality fixes --- src/guidellm/backend/openai.py | 2 +- src/guidellm/utils/images.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/guidellm/backend/openai.py b/src/guidellm/backend/openai.py index f75bb3b..90d2791 100644 --- a/src/guidellm/backend/openai.py +++ b/src/guidellm/backend/openai.py @@ -179,7 +179,7 @@ def _build_messages(self, request: TextGenerationRequest) -> Dict: stream = io.BytesIO() im_format = image.image.format or "PNG" image.image.save(stream, format=im_format) - im_b64 = base64.b64encode(stream.getvalue()).decode("ascii") + im_b64 = base64.b64encode(stream.getvalue()).decode("utf-8") image_url = {"url": f"data:image/{im_format.lower()};base64,{im_b64}"} content.append({"type": "image_url", "image_url": image_url}) diff --git a/src/guidellm/utils/images.py b/src/guidellm/utils/images.py index 5c5a727..5d73bc0 100644 --- a/src/guidellm/utils/images.py +++ b/src/guidellm/utils/images.py @@ -65,4 +65,4 @@ def load_images(data: str) -> List[ImageDescriptor]: ) ) - return images + return images From 984da28e4423f1726888b6a37de30621029d3622 Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Tue, 5 Nov 2024 02:43:04 +0000 Subject: [PATCH 13/45] Add new dependencies --- .pre-commit-config.yaml | 3 +++ pyproject.toml | 3 +++ 2 files changed, 6 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2a085bb..6bcf150 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,6 +27,9 @@ repos: pyyaml, requests, rich, + pillow, + base64, + io, transformers, # dev dependencies diff --git a/pyproject.toml b/pyproject.toml index 6ab2c6e..b83abfd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,9 @@ dependencies = [ "pyyaml>=6.0.0", "requests", "rich", + "pillow", + "base64", + "io", "transformers", ] From 355f368b559d2b35e8d95fc7e761b53fce5cf15d Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Tue, 3 Dec 2024 22:54:54 +0000 Subject: [PATCH 14/45] Allow images to be resized to specific resolution --- src/guidellm/core/request.py | 12 ++++++++++-- src/guidellm/request/emulated.py | 10 ++++++++-- src/guidellm/utils/images.py | 6 +++++- 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/src/guidellm/core/request.py b/src/guidellm/core/request.py index a1ff199..8585979 100644 --- a/src/guidellm/core/request.py +++ b/src/guidellm/core/request.py @@ -1,5 +1,5 @@ import uuid -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Tuple from pydantic import Field @@ -41,6 +41,14 @@ def number_images(self) -> int: else: return len(self.images) + @property + def image_resolution(self) -> Tuple[int]: + if self.images is None: + return None + else: + return [im.size for im in self.images] + + def __str__(self) -> str: prompt_short = ( self.prompt[:32] + "..." @@ -53,5 +61,5 @@ def __str__(self) -> str: f"prompt={prompt_short}, prompt_token_count={self.prompt_token_count}, " f"output_token_count={self.output_token_count}, " f"params={self.params})" - f"images={self.number_images}" + f"image_resolution={self.image_resolution}" ) diff --git a/src/guidellm/request/emulated.py b/src/guidellm/request/emulated.py index f15387e..8818ff9 100644 --- a/src/guidellm/request/emulated.py +++ b/src/guidellm/request/emulated.py @@ -30,7 +30,8 @@ class EmulatedConfig: generated_tokens_variance (Optional[int]): Variance for generated tokens. generated_tokens_min (Optional[int]): Minimum number of generated tokens. generated_tokens_max (Optional[int]): Maximum number of generated tokens. - images (Optional[int]): Number of input images. + images (Optional[int]): Number of images. + image_resultion (Optional[List[int]]): Resolution of images. """ @staticmethod @@ -107,6 +108,11 @@ def create_config(config: Optional[Union[str, Path, Dict]]) -> "EmulatedConfig": generated_tokens_max: Optional[int] = None images: int = 0 + image_resolution = None + + def __post_init__(self): + if self.images is not None and self.image_resultion is not None and self.images > 0: + assert len(self.image_resolution) == 2 @property def prompt_tokens_range(self) -> Tuple[int, int]: @@ -331,7 +337,7 @@ def __init__( settings.emulated_data.filter_end, ) if self._config.images > 0: - self._images = load_images(settings.emulated_data.image_source) + self._images = load_images(settings.emulated_data.image_source, self._config.image_resolution) self._rng = np.random.default_rng(random_seed) # NOTE: Must be after all the parameters since the queue population diff --git a/src/guidellm/utils/images.py b/src/guidellm/utils/images.py index 5d73bc0..569fe75 100644 --- a/src/guidellm/utils/images.py +++ b/src/guidellm/utils/images.py @@ -27,7 +27,7 @@ class ImageDescriptor(Serializable): ) -def load_images(data: str) -> List[ImageDescriptor]: +def load_images(data: str, image_resolution: Optional[List[int]]) -> List[ImageDescriptor]: """ Load an HTML file from a path or URL @@ -56,6 +56,10 @@ def load_images(data: str) -> List[ImageDescriptor]: logger.debug("Loading image: {}", img_url) img_response = requests.get(img_url) img_response.raise_for_status() + image = Image.open(BytesIO(img_response.content)) + + if image_resolution is not None: + image = image.resize(image_resolution) # Load image into Pillow images.append( From 43f14d4febc467daf8ef2028ec10e40e9d4c5f37 Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Wed, 4 Dec 2024 18:07:22 +0000 Subject: [PATCH 15/45] Ignore EOS --- src/guidellm/backend/openai.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/guidellm/backend/openai.py b/src/guidellm/backend/openai.py index b5cbc12..8ae18c5 100644 --- a/src/guidellm/backend/openai.py +++ b/src/guidellm/backend/openai.py @@ -92,6 +92,7 @@ async def make_request( { "max_tokens": request.output_token_count, "stop": None, + "ignore_eos": True, } ) elif settings.openai.max_gen_tokens and settings.openai.max_gen_tokens > 0: From d9819e94e87c2855214eda586861886f34c1e61d Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Wed, 4 Dec 2024 18:47:05 +0000 Subject: [PATCH 16/45] Ignore EOS --- src/guidellm/backend/openai.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/guidellm/backend/openai.py b/src/guidellm/backend/openai.py index 8ae18c5..6f420ad 100644 --- a/src/guidellm/backend/openai.py +++ b/src/guidellm/backend/openai.py @@ -92,7 +92,9 @@ async def make_request( { "max_tokens": request.output_token_count, "stop": None, - "ignore_eos": True, + "extra_body": { + "ignore_eos": True, + } } ) elif settings.openai.max_gen_tokens and settings.openai.max_gen_tokens > 0: From 503a56c6c4e4feb3d669cf9d6cc1ff095f175062 Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Thu, 5 Dec 2024 22:41:44 +0000 Subject: [PATCH 17/45] Add image processing dependencies --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index b83abfd..caaeef0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,8 @@ dependencies = [ "base64", "io", "transformers", + "pillow", + "bs4", ] [project.optional-dependencies] From ffcb28ded1762b309fa46bebae94eec645dcb07f Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Thu, 5 Dec 2024 22:43:35 +0000 Subject: [PATCH 18/45] Fix support to images --- src/guidellm/core/request.py | 4 ++-- src/guidellm/request/emulated.py | 8 +++++--- src/guidellm/utils/images.py | 10 +++++++++- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/guidellm/core/request.py b/src/guidellm/core/request.py index 8585979..cc82659 100644 --- a/src/guidellm/core/request.py +++ b/src/guidellm/core/request.py @@ -1,5 +1,5 @@ import uuid -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, List, Optional from pydantic import Field @@ -42,7 +42,7 @@ def number_images(self) -> int: return len(self.images) @property - def image_resolution(self) -> Tuple[int]: + def image_resolution(self) -> List[int]: if self.images is None: return None else: diff --git a/src/guidellm/request/emulated.py b/src/guidellm/request/emulated.py index 8818ff9..43c3389 100644 --- a/src/guidellm/request/emulated.py +++ b/src/guidellm/request/emulated.py @@ -31,7 +31,8 @@ class EmulatedConfig: generated_tokens_min (Optional[int]): Minimum number of generated tokens. generated_tokens_max (Optional[int]): Maximum number of generated tokens. images (Optional[int]): Number of images. - image_resultion (Optional[List[int]]): Resolution of images. + width (Optional[int]): Width of images. + height (Optional[int]): Height of images. """ @staticmethod @@ -108,7 +109,8 @@ def create_config(config: Optional[Union[str, Path, Dict]]) -> "EmulatedConfig": generated_tokens_max: Optional[int] = None images: int = 0 - image_resolution = None + width: int = None + height: int = None def __post_init__(self): if self.images is not None and self.image_resultion is not None and self.images > 0: @@ -337,7 +339,7 @@ def __init__( settings.emulated_data.filter_end, ) if self._config.images > 0: - self._images = load_images(settings.emulated_data.image_source, self._config.image_resolution) + self._images = load_images(settings.emulated_data.image_source, [self._config.width, self._config.height]) self._rng = np.random.default_rng(random_seed) # NOTE: Must be after all the parameters since the queue population diff --git a/src/guidellm/utils/images.py b/src/guidellm/utils/images.py index 569fe75..72846d7 100644 --- a/src/guidellm/utils/images.py +++ b/src/guidellm/utils/images.py @@ -6,7 +6,7 @@ from bs4 import BeautifulSoup from loguru import logger from PIL import Image -from pydantic import ConfigDict, Field +from pydantic import ConfigDict, Field, computed_field from guidellm.config import settings from guidellm.core.serializable import Serializable @@ -26,6 +26,14 @@ class ImageDescriptor(Serializable): description="Image filename.", ) + @computed_field # type: ignore[misc] + @property + def image_resolution(self) -> List[int]: + if self.images is None: + return None + else: + return [im.size for im in self.images] + def load_images(data: str, image_resolution: Optional[List[int]]) -> List[ImageDescriptor]: """ From 6106a719c5b64f4f6a834879a3512432b45bb92f Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Thu, 5 Dec 2024 22:51:42 +0000 Subject: [PATCH 19/45] Fix serialization --- src/guidellm/core/request.py | 4 ++-- src/guidellm/utils/images.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/guidellm/core/request.py b/src/guidellm/core/request.py index cc82659..8ace3d1 100644 --- a/src/guidellm/core/request.py +++ b/src/guidellm/core/request.py @@ -1,5 +1,5 @@ import uuid -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Tuple from pydantic import Field @@ -42,7 +42,7 @@ def number_images(self) -> int: return len(self.images) @property - def image_resolution(self) -> List[int]: + def image_resolution(self) -> List[Tuple[int]]: if self.images is None: return None else: diff --git a/src/guidellm/utils/images.py b/src/guidellm/utils/images.py index 72846d7..ed025db 100644 --- a/src/guidellm/utils/images.py +++ b/src/guidellm/utils/images.py @@ -1,5 +1,5 @@ from io import BytesIO -from typing import List, Optional +from typing import List, Optional, Tuple from urllib.parse import urljoin import requests @@ -28,11 +28,11 @@ class ImageDescriptor(Serializable): @computed_field # type: ignore[misc] @property - def image_resolution(self) -> List[int]: - if self.images is None: + def image_resolution(self) -> Tuple[int]: + if self.image is None: return None else: - return [im.size for im in self.images] + return self.image.size def load_images(data: str, image_resolution: Optional[List[int]]) -> List[ImageDescriptor]: From 81718204b74c649486e91c900b05a48b053944e7 Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Thu, 5 Dec 2024 22:52:30 +0000 Subject: [PATCH 20/45] Fix image registration --- src/guidellm/request/emulated.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/guidellm/request/emulated.py b/src/guidellm/request/emulated.py index 43c3389..02f564a 100644 --- a/src/guidellm/request/emulated.py +++ b/src/guidellm/request/emulated.py @@ -112,10 +112,6 @@ def create_config(config: Optional[Union[str, Path, Dict]]) -> "EmulatedConfig": width: int = None height: int = None - def __post_init__(self): - if self.images is not None and self.image_resultion is not None and self.images > 0: - assert len(self.image_resolution) == 2 - @property def prompt_tokens_range(self) -> Tuple[int, int]: """ From e845510888c212c4395da2e40b5d58113c03ea97 Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Thu, 5 Dec 2024 22:55:43 +0000 Subject: [PATCH 21/45] Fix pydantic format --- src/guidellm/core/request.py | 2 +- src/guidellm/utils/images.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/guidellm/core/request.py b/src/guidellm/core/request.py index 8ace3d1..06d0f37 100644 --- a/src/guidellm/core/request.py +++ b/src/guidellm/core/request.py @@ -42,7 +42,7 @@ def number_images(self) -> int: return len(self.images) @property - def image_resolution(self) -> List[Tuple[int]]: + def image_resolution(self) -> List[Tuple[int, int]]: if self.images is None: return None else: diff --git a/src/guidellm/utils/images.py b/src/guidellm/utils/images.py index ed025db..ed1b30a 100644 --- a/src/guidellm/utils/images.py +++ b/src/guidellm/utils/images.py @@ -28,7 +28,7 @@ class ImageDescriptor(Serializable): @computed_field # type: ignore[misc] @property - def image_resolution(self) -> Tuple[int]: + def image_resolution(self) -> Tuple[int, int]: if self.image is None: return None else: From d1ad0f89a53c4c8d09269e8df0ff613a2a18355a Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Thu, 5 Dec 2024 23:20:48 +0000 Subject: [PATCH 22/45] Use resized image --- src/guidellm/utils/images.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/guidellm/utils/images.py b/src/guidellm/utils/images.py index ed1b30a..fb66d43 100644 --- a/src/guidellm/utils/images.py +++ b/src/guidellm/utils/images.py @@ -73,7 +73,7 @@ def load_images(data: str, image_resolution: Optional[List[int]]) -> List[ImageD images.append( ImageDescriptor( url=img_url, - image=Image.open(BytesIO(img_response.content)), + image=image, ) ) From 40e8e92d39f73f01e6c5bf8a069c00ee5ca4d221 Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Fri, 6 Dec 2024 13:54:21 -0500 Subject: [PATCH 23/45] Update pyproject.toml --- pyproject.toml | 2 -- 1 file changed, 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index caaeef0..b83abfd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,8 +40,6 @@ dependencies = [ "base64", "io", "transformers", - "pillow", - "bs4", ] [project.optional-dependencies] From 0d8eb2f03aa61b9dd22a787b90ee3da9de35a8cf Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Fri, 6 Dec 2024 14:06:33 -0500 Subject: [PATCH 24/45] Update pyproject.toml --- pyproject.toml | 3 --- 1 file changed, 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b83abfd..6ab2c6e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,9 +36,6 @@ dependencies = [ "pyyaml>=6.0.0", "requests", "rich", - "pillow", - "base64", - "io", "transformers", ] From 511d3cb56997543265607011fc4af468624c66d0 Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Fri, 6 Dec 2024 14:07:04 -0500 Subject: [PATCH 25/45] Update .pre-commit-config.yaml --- .pre-commit-config.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6bcf150..2a085bb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,9 +27,6 @@ repos: pyyaml, requests, rich, - pillow, - base64, - io, transformers, # dev dependencies From bca2614c0da83fba8a12ab76453e40e464e44997 Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Fri, 6 Dec 2024 21:54:52 +0000 Subject: [PATCH 26/45] Adds aiohttp backend --- src/guidellm/backend/__init__.py | 2 + src/guidellm/backend/aiohttp.py | 160 +++++++++++++++++++++++++++++++ src/guidellm/backend/base.py | 2 +- src/guidellm/config.py | 4 + 4 files changed, 167 insertions(+), 1 deletion(-) create mode 100644 src/guidellm/backend/aiohttp.py diff --git a/src/guidellm/backend/__init__.py b/src/guidellm/backend/__init__.py index 875e319..1391018 100644 --- a/src/guidellm/backend/__init__.py +++ b/src/guidellm/backend/__init__.py @@ -1,5 +1,6 @@ from .base import Backend, BackendEngine, BackendEnginePublic, GenerativeResponse from .openai import OpenAIBackend +from .aiohttp import AiohttpBackend __all__ = [ "Backend", @@ -7,4 +8,5 @@ "BackendEnginePublic", "GenerativeResponse", "OpenAIBackend", + "AiohttpBackend" ] diff --git a/src/guidellm/backend/aiohttp.py b/src/guidellm/backend/aiohttp.py new file mode 100644 index 0000000..138f45a --- /dev/null +++ b/src/guidellm/backend/aiohttp.py @@ -0,0 +1,160 @@ +from typing import AsyncGenerator, Dict, List, Optional +from loguru import logger + +import aiohttp +import json + +from guidellm.backend.base import Backend, GenerativeResponse +from guidellm.config import settings +from guidellm.core import TextGenerationRequest + +__all__ = ["AiohttpBackend"] + +@Backend.register("aiohttp_server") +class AiohttpBackend(Backend): + """ + An aiohttp-based backend implementation for LLM requests. + + This class provides an interface to communicate with a server hosting + an LLM API using aiohttp for asynchronous requests. + """ + + def __init__( + self, + openai_api_key: Optional[str] = None, + target: Optional[str] = None, + model: Optional[str] = None, + timeout: Optional[float] = None, + **request_args, + ): + self._request_args: Dict = request_args + self._api_key: str = openai_api_key or settings.aiohttp.api_key + + if not self._api_key: + err = ValueError( + "`GUIDELLM__AIOHTTP__API_KEY` environment variable or " + "--openai-api-key CLI parameter must be specified for the " + "aiohttp backend." + ) + logger.error("{}", err) + raise err + + base_url = target or settings.aiohttp.base_url + self._api_url = f"{base_url}/chat/completions" + + if not base_url: + err = ValueError( + "`GUIDELLM__AIOHTTP__BASE_URL` environment variable or " + "target parameter must be specified for the OpenAI backend." + ) + logger.error("{}", err) + raise err + + self._timeout = aiohttp.ClientTimeout(total=timeout or settings.request_timeout) + self._model = model + + super().__init__(type_="aiohttp_backend", target=base_url, model=self._model) + logger.info("aiohttp {} Backend listening on {}", self._model, base_url) + + async def make_request( + self, + request: TextGenerationRequest, + ) -> AsyncGenerator[GenerativeResponse, None]: + """ + Make a request to the aiohttp backend. + + Sends a prompt to the LLM server and streams the response tokens. + + :param request: The text generation request to submit. + :type request: TextGenerationRequest + :yield: A stream of GenerativeResponse objects. + :rtype: AsyncGenerator[GenerativeResponse, None] + """ + + async with aiohttp.ClientSession(timeout=self._timeout) as session: + logger.debug("Making request to aiohttp backend with prompt: {}", request.prompt) + + request_args = {} + if request.output_token_count is not None: + request_args.update( + { + "max_completion_tokens": request.output_token_count, + "stop": None, + "ignore_eos": True, + } + ) + elif settings.aiohttp.max_gen_tokens and settings.aiohttp.max_gen_tokens > 0: + request_args.update( + { + "max_tokens": settings.aiohttp.max_gen_tokens, + } + ) + + request_args.update(self._request_args) + + payload = { + "model": self._model, + "messages": [ + {"role": "user", "content": request.prompt}, + ], + "stream": True, + **request_args, + } + + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {self._api_key}", + } + + try: + async with session.post(url=self._api_url, json=payload, headers=headers) as response: + if response.status != 200: + error_message = await response.text() + logger.error("Request failed: {} - {}", response.status, error_message) + raise Exception(f"Failed to generate response: {error_message}") + + token_count = 0 + async for chunk_bytes in response.content: + chunk_bytes = chunk_bytes.strip() + if not chunk_bytes: + continue + + chunk = chunk_bytes.decode("utf-8").removeprefix("data: ") + if chunk == "[DONE]": + # Final response + yield GenerativeResponse( + type_="final", + prompt=request.prompt, + output_token_count=token_count, + prompt_token_count=request.prompt_token_count, + ) + else: + # Intermediate token response + token_count += 1 + data = json.loads(chunk) + delta = data["choices"][0]["delta"] + token = delta["content"] + yield GenerativeResponse( + type_="token_iter", + add_token=token, + prompt=request.prompt, + output_token_count=token_count, + prompt_token_count=request.prompt_token_count, + ) + except Exception as e: + logger.error("Error while making request: {}", e) + raise + + def available_models(self) -> List[str]: + """ + Retrieve a list of available models from the server. + """ + # This could include an API call to `self._api_url/models` if the server supports it. + logger.warning("Fetching available models is not implemented for aiohttp backend.") + return [] + + def validate_connection(self): + """ + Validate the connection to the backend server. + """ + logger.info("Connection validation is not explicitly implemented for aiohttp backend.") diff --git a/src/guidellm/backend/base.py b/src/guidellm/backend/base.py index d71c5f6..a165859 100644 --- a/src/guidellm/backend/base.py +++ b/src/guidellm/backend/base.py @@ -15,7 +15,7 @@ __all__ = ["Backend", "BackendEngine", "BackendEnginePublic", "GenerativeResponse"] -BackendEnginePublic = Literal["openai_server"] +BackendEnginePublic = Literal["openai_server", "aiohttp_server"] BackendEngine = Union[BackendEnginePublic, Literal["test"]] diff --git a/src/guidellm/config.py b/src/guidellm/config.py index c3d950e..0594709 100644 --- a/src/guidellm/config.py +++ b/src/guidellm/config.py @@ -108,6 +108,9 @@ class OpenAISettings(BaseModel): max_gen_tokens: int = 4096 +class AiohttpSettings(OpenAISettings): + pass + class ReportGenerationSettings(BaseModel): """ Report generation settings for the application @@ -152,6 +155,7 @@ class Settings(BaseSettings): # Request settings openai: OpenAISettings = OpenAISettings() + aiohttp: AiohttpSettings = AiohttpSettings() # Report settings report_generation: ReportGenerationSettings = ReportGenerationSettings() From 72db6a4e87093fcf9ca56cf2186af6fa69842250 Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Wed, 11 Dec 2024 04:22:39 +0000 Subject: [PATCH 27/45] Add support for aiohttp backend --- src/guidellm/backend/aiohttp.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/src/guidellm/backend/aiohttp.py b/src/guidellm/backend/aiohttp.py index 138f45a..fbbd971 100644 --- a/src/guidellm/backend/aiohttp.py +++ b/src/guidellm/backend/aiohttp.py @@ -1,3 +1,5 @@ +import base64 +import io from typing import AsyncGenerator, Dict, List, Optional from loguru import logger @@ -92,11 +94,11 @@ async def make_request( request_args.update(self._request_args) + messages = self._build_messages(request) + payload = { "model": self._model, - "messages": [ - {"role": "user", "content": request.prompt}, - ], + "messages": messages, "stream": True, **request_args, } @@ -158,3 +160,21 @@ def validate_connection(self): Validate the connection to the backend server. """ logger.info("Connection validation is not explicitly implemented for aiohttp backend.") + + def _build_messages(self, request: TextGenerationRequest) -> Dict: + if request.number_images == 0: + messages = [{"role": "user", "content": request.prompt}] + else: + content = [] + for image in request.images: + stream = io.BytesIO() + im_format = image.image.format or "PNG" + image.image.save(stream, format=im_format) + im_b64 = base64.b64encode(stream.getvalue()).decode("utf-8") + image_url = {"url": f"data:image/{im_format.lower()};base64,{im_b64}"} + content.append({"type": "image_url", "image_url": image_url}) + + content.append({"type": "text", "text": request.prompt}) + messages = [{"role": "user", "content": content}] + + return messages From ba4187dea12888c7e74cfe0acfb6049fa460f4e7 Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Wed, 4 Sep 2024 22:09:59 +0000 Subject: [PATCH 28/45] Add mean and percentile info as computed_field properties such that they become serializable --- src/guidellm/core/report.py | 26 ++++------- src/guidellm/core/result.py | 92 ++++++++++++++++++++++++++++++++++++- 2 files changed, 99 insertions(+), 19 deletions(-) diff --git a/src/guidellm/core/report.py b/src/guidellm/core/report.py index b6791e4..c48eed5 100644 --- a/src/guidellm/core/report.py +++ b/src/guidellm/core/report.py @@ -147,19 +147,15 @@ def _create_benchmark_report_data_tokens_summary( for benchmark in report.benchmarks_sorted: table.add_row( _benchmark_rate_id(benchmark), - f"{benchmark.prompt_token_distribution.mean:.2f}", + f"{benchmark.prompt_token:.2f}", ", ".join( f"{percentile:.1f}" - for percentile in benchmark.prompt_token_distribution.percentiles( - [1, 5, 50, 95, 99] - ) + for percentile in benchmark.prompt_token_percentiles ), - f"{benchmark.output_token_distribution.mean:.2f}", + f"{benchmark.output_token:.2f}", ", ".join( f"{percentile:.1f}" - for percentile in benchmark.output_token_distribution.percentiles( - [1, 5, 50, 95, 99] - ) + for percentile in benchmark.output_token_percentiles ), ) logger.debug("Created data tokens summary table for the report.") @@ -181,7 +177,7 @@ def _create_benchmark_report_dist_perf_summary( "Benchmark", "Request Latency [1%, 5%, 10%, 50%, 90%, 95%, 99%] (sec)", "Time to First Token [1%, 5%, 10%, 50%, 90%, 95%, 99%] (ms)", - "Inter Token Latency [1%, 5%, 10%, 50%, 90% 95%, 99%] (ms)", + "Inter Token Latency [1%, 5%, 10%, 50%, 90%, 95%, 99%] (ms)", title="[magenta]Performance Stats by Benchmark[/magenta]", title_style="bold", title_justify="left", @@ -193,21 +189,15 @@ def _create_benchmark_report_dist_perf_summary( _benchmark_rate_id(benchmark), ", ".join( f"{percentile:.2f}" - for percentile in benchmark.request_latency_distribution.percentiles( - [1, 5, 10, 50, 90, 95, 99] - ) + for percentile in benchmark.request_latency_percentiles ), ", ".join( f"{percentile * 1000:.1f}" - for percentile in benchmark.ttft_distribution.percentiles( - [1, 5, 10, 50, 90, 95, 99] - ) + for percentile in benchmark.time_to_first_token_percentiles ), ", ".join( f"{percentile * 1000:.1f}" - for percentile in benchmark.itl_distribution.percentiles( - [1, 5, 10, 50, 90, 95, 99] - ) + for percentile in benchmark.inter_token_latency_percentiles ), ) logger.debug("Created distribution performance summary table for the report.") diff --git a/src/guidellm/core/result.py b/src/guidellm/core/result.py index f218784..5fd29a8 100644 --- a/src/guidellm/core/result.py +++ b/src/guidellm/core/result.py @@ -2,7 +2,7 @@ from typing import Any, Dict, List, Literal, Optional, Union from loguru import logger -from pydantic import Field +from pydantic import Field, computed_field from guidellm.core.distribution import Distribution from guidellm.core.request import TextGenerationRequest @@ -221,6 +221,7 @@ def __iter__(self): """ return iter(self.results) + @computed_field @property def request_count(self) -> int: """ @@ -231,6 +232,7 @@ def request_count(self) -> int: """ return len(self.results) + @computed_field @property def error_count(self) -> int: """ @@ -241,6 +243,7 @@ def error_count(self) -> int: """ return len(self.errors) + @computed_field @property def total_count(self) -> int: """ @@ -251,6 +254,7 @@ def total_count(self) -> int: """ return self.request_count + self.error_count + @computed_field @property def start_time(self) -> Optional[float]: """ @@ -264,6 +268,7 @@ def start_time(self) -> Optional[float]: return self.results[0].start_time + @computed_field @property def end_time(self) -> Optional[float]: """ @@ -277,6 +282,7 @@ def end_time(self) -> Optional[float]: return self.results[-1].end_time + @computed_field @property def duration(self) -> float: """ @@ -290,6 +296,7 @@ def duration(self) -> float: return self.end_time - self.start_time + @computed_field @property def completed_request_rate(self) -> float: """ @@ -303,6 +310,7 @@ def completed_request_rate(self) -> float: return len(self.results) / self.duration + @computed_field @property def request_latency(self) -> float: """ @@ -332,6 +340,19 @@ def request_latency_distribution(self) -> Distribution: ] ) + @computed_field + @property + def request_latency_percentiles(self) -> List[float]: + """ + Get standard percentiles of request latency in seconds. + + :return: List of percentile request latency in seconds + :rtype: List[float] + """ + return self.request_latency_distribution.percentiles([1, 5, 10, 50, 90, 95, 99]) + + + @computed_field @property def time_to_first_token(self) -> float: """ @@ -360,7 +381,19 @@ def ttft_distribution(self) -> Distribution: if result.first_token_time is not None ] ) + + @computed_field + @property + def time_to_first_token_percentiles(self) -> List[float]: + """ + Get standard percentiles for time taken to decode the first token in milliseconds. + + :return: List of percentile time taken to decode the first token in milliseconds. + :rtype: List[float] + """ + return self.ttft_distribution.percentiles([1, 5, 10, 50, 90, 95, 99]) + @computed_field @property def inter_token_latency(self) -> float: """ @@ -387,7 +420,19 @@ def itl_distribution(self) -> Distribution: decode for result in self.results for decode in result.decode_times.data ] ) + + @computed_field + @property + def inter_token_latency_percentiles(self) -> List[float]: + """ + Get standard percentiles for the time between tokens in milliseconds. + :return: List of percentiles for the average time between tokens. + :rtype: List[float] + """ + return self.itl_distribution.percentiles([1, 5, 10, 50, 90, 95, 99]) + + @computed_field @property def output_token_throughput(self) -> float: """ @@ -403,6 +448,17 @@ def output_token_throughput(self) -> float: return total_tokens / self.duration + @computed_field + @property + def prompt_token(self) -> float: + """ + Get the average number of prompt tokens. + + :return: The average number of prompt tokens. + :rtype: float + """ + return self.prompt_token_distribution.mean + @property def prompt_token_distribution(self) -> Distribution: """ @@ -413,6 +469,28 @@ def prompt_token_distribution(self) -> Distribution: """ return Distribution(data=[result.prompt_token_count for result in self.results]) + @computed_field + @property + def prompt_token_percentiles(self) -> List[float]: + """ + Get standard percentiles for number of prompt tokens. + + :return: List of percentiles of number of prompt tokens. + :rtype: List[float] + """ + return self.prompt_token_distribution.percentiles([1, 5, 50, 95, 99]) + + @computed_field + @property + def output_token(self) -> float: + """ + Get the average number of output tokens. + + :return: The average number of output tokens. + :rtype: float + """ + return self.output_token_distribution.mean + @property def output_token_distribution(self) -> Distribution: """ @@ -423,6 +501,18 @@ def output_token_distribution(self) -> Distribution: """ return Distribution(data=[result.output_token_count for result in self.results]) + @computed_field + @property + def output_token_percentiles(self) -> List[float]: + """ + Get standard percentiles for number of output tokens. + + :return: List of percentiles of number of output tokens. + :rtype: List[float] + """ + return self.output_token_distribution.percentiles([1, 5, 50, 95, 99]) + + @computed_field @property def overloaded(self) -> bool: if ( From 1566348a6b9f91762d8fb6f94b621416ad166b1f Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Thu, 5 Sep 2024 01:19:42 +0000 Subject: [PATCH 29/45] quality fixes --- src/guidellm/core/result.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/guidellm/core/result.py b/src/guidellm/core/result.py index 5fd29a8..90906b7 100644 --- a/src/guidellm/core/result.py +++ b/src/guidellm/core/result.py @@ -381,7 +381,7 @@ def ttft_distribution(self) -> Distribution: if result.first_token_time is not None ] ) - + @computed_field @property def time_to_first_token_percentiles(self) -> List[float]: @@ -390,7 +390,7 @@ def time_to_first_token_percentiles(self) -> List[float]: :return: List of percentile time taken to decode the first token in milliseconds. :rtype: List[float] - """ + """ return self.ttft_distribution.percentiles([1, 5, 10, 50, 90, 95, 99]) @computed_field @@ -420,7 +420,7 @@ def itl_distribution(self) -> Distribution: decode for result in self.results for decode in result.decode_times.data ] ) - + @computed_field @property def inter_token_latency_percentiles(self) -> List[float]: From e37166f68b029cafc56b097b444e5324971dd3d7 Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Thu, 5 Sep 2024 01:23:25 +0000 Subject: [PATCH 30/45] quality fix --- src/guidellm/core/result.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/guidellm/core/result.py b/src/guidellm/core/result.py index 90906b7..95a4230 100644 --- a/src/guidellm/core/result.py +++ b/src/guidellm/core/result.py @@ -386,9 +386,11 @@ def ttft_distribution(self) -> Distribution: @property def time_to_first_token_percentiles(self) -> List[float]: """ - Get standard percentiles for time taken to decode the first token in milliseconds. + Get standard percentiles for time taken to decode the first token + in milliseconds. - :return: List of percentile time taken to decode the first token in milliseconds. + :return: List of percentile time taken to decode the first token + in milliseconds. :rtype: List[float] """ return self.ttft_distribution.percentiles([1, 5, 10, 50, 90, 95, 99]) From 9039845f9ffa1c0bb84855704845883a43d00c61 Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Fri, 6 Sep 2024 14:28:54 +0000 Subject: [PATCH 31/45] Quality fixes --- src/guidellm/core/result.py | 38 ++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/src/guidellm/core/result.py b/src/guidellm/core/result.py index 95a4230..aebd176 100644 --- a/src/guidellm/core/result.py +++ b/src/guidellm/core/result.py @@ -221,7 +221,7 @@ def __iter__(self): """ return iter(self.results) - @computed_field + @computed_field # type: ignore[misc] @property def request_count(self) -> int: """ @@ -232,7 +232,7 @@ def request_count(self) -> int: """ return len(self.results) - @computed_field + @computed_field # type: ignore[misc] @property def error_count(self) -> int: """ @@ -243,7 +243,7 @@ def error_count(self) -> int: """ return len(self.errors) - @computed_field + @computed_field # type: ignore[misc] @property def total_count(self) -> int: """ @@ -254,7 +254,7 @@ def total_count(self) -> int: """ return self.request_count + self.error_count - @computed_field + @computed_field # type: ignore[misc] @property def start_time(self) -> Optional[float]: """ @@ -268,7 +268,7 @@ def start_time(self) -> Optional[float]: return self.results[0].start_time - @computed_field + @computed_field # type: ignore[misc] @property def end_time(self) -> Optional[float]: """ @@ -282,7 +282,7 @@ def end_time(self) -> Optional[float]: return self.results[-1].end_time - @computed_field + @computed_field # type: ignore[misc] @property def duration(self) -> float: """ @@ -296,7 +296,7 @@ def duration(self) -> float: return self.end_time - self.start_time - @computed_field + @computed_field # type: ignore[misc] @property def completed_request_rate(self) -> float: """ @@ -310,7 +310,7 @@ def completed_request_rate(self) -> float: return len(self.results) / self.duration - @computed_field + @computed_field # type: ignore[misc] @property def request_latency(self) -> float: """ @@ -340,7 +340,7 @@ def request_latency_distribution(self) -> Distribution: ] ) - @computed_field + @computed_field # type: ignore[misc] @property def request_latency_percentiles(self) -> List[float]: """ @@ -352,7 +352,7 @@ def request_latency_percentiles(self) -> List[float]: return self.request_latency_distribution.percentiles([1, 5, 10, 50, 90, 95, 99]) - @computed_field + @computed_field # type: ignore[misc] @property def time_to_first_token(self) -> float: """ @@ -382,7 +382,7 @@ def ttft_distribution(self) -> Distribution: ] ) - @computed_field + @computed_field # type: ignore[misc] @property def time_to_first_token_percentiles(self) -> List[float]: """ @@ -395,7 +395,7 @@ def time_to_first_token_percentiles(self) -> List[float]: """ return self.ttft_distribution.percentiles([1, 5, 10, 50, 90, 95, 99]) - @computed_field + @computed_field # type: ignore[misc] @property def inter_token_latency(self) -> float: """ @@ -423,7 +423,7 @@ def itl_distribution(self) -> Distribution: ] ) - @computed_field + @computed_field # type: ignore[misc] @property def inter_token_latency_percentiles(self) -> List[float]: """ @@ -434,7 +434,7 @@ def inter_token_latency_percentiles(self) -> List[float]: """ return self.itl_distribution.percentiles([1, 5, 10, 50, 90, 95, 99]) - @computed_field + @computed_field # type: ignore[misc] @property def output_token_throughput(self) -> float: """ @@ -450,7 +450,7 @@ def output_token_throughput(self) -> float: return total_tokens / self.duration - @computed_field + @computed_field # type: ignore[misc] @property def prompt_token(self) -> float: """ @@ -471,7 +471,7 @@ def prompt_token_distribution(self) -> Distribution: """ return Distribution(data=[result.prompt_token_count for result in self.results]) - @computed_field + @computed_field # type: ignore[misc] @property def prompt_token_percentiles(self) -> List[float]: """ @@ -482,7 +482,7 @@ def prompt_token_percentiles(self) -> List[float]: """ return self.prompt_token_distribution.percentiles([1, 5, 50, 95, 99]) - @computed_field + @computed_field # type: ignore[misc] @property def output_token(self) -> float: """ @@ -503,7 +503,7 @@ def output_token_distribution(self) -> Distribution: """ return Distribution(data=[result.output_token_count for result in self.results]) - @computed_field + @computed_field # type: ignore[misc] @property def output_token_percentiles(self) -> List[float]: """ @@ -514,7 +514,7 @@ def output_token_percentiles(self) -> List[float]: """ return self.output_token_distribution.percentiles([1, 5, 50, 95, 99]) - @computed_field + @computed_field # type: ignore[misc] @property def overloaded(self) -> bool: if ( From 036917e543cdbd61bde487121ed26a2b000ef8b7 Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Wed, 4 Dec 2024 18:07:22 +0000 Subject: [PATCH 32/45] Ignore EOS --- src/guidellm/backend/openai.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/guidellm/backend/openai.py b/src/guidellm/backend/openai.py index 90d2791..4ea1396 100644 --- a/src/guidellm/backend/openai.py +++ b/src/guidellm/backend/openai.py @@ -94,6 +94,7 @@ async def make_request( { "max_tokens": request.output_token_count, "stop": None, + "ignore_eos": True, } ) elif settings.openai.max_gen_tokens and settings.openai.max_gen_tokens > 0: From 6e6691cb319dbd49866061357251fa410a418589 Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Wed, 4 Dec 2024 18:47:05 +0000 Subject: [PATCH 33/45] Ignore EOS --- src/guidellm/backend/openai.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/guidellm/backend/openai.py b/src/guidellm/backend/openai.py index 4ea1396..9843fc1 100644 --- a/src/guidellm/backend/openai.py +++ b/src/guidellm/backend/openai.py @@ -94,7 +94,9 @@ async def make_request( { "max_tokens": request.output_token_count, "stop": None, - "ignore_eos": True, + "extra_body": { + "ignore_eos": True, + } } ) elif settings.openai.max_gen_tokens and settings.openai.max_gen_tokens > 0: From ff742f16df70952be2f79d24b19b95fbefcc671c Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Thu, 5 Dec 2024 22:41:44 +0000 Subject: [PATCH 34/45] Add image processing dependencies --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index b83abfd..caaeef0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,8 @@ dependencies = [ "base64", "io", "transformers", + "pillow", + "bs4", ] [project.optional-dependencies] From 2b1706f452598b489a2dfdccca532d6ee9be0eff Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Thu, 5 Dec 2024 22:43:35 +0000 Subject: [PATCH 35/45] Fix support to images --- src/guidellm/core/request.py | 4 ++-- src/guidellm/request/emulated.py | 8 +++++--- src/guidellm/utils/images.py | 10 +++++++++- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/guidellm/core/request.py b/src/guidellm/core/request.py index 8585979..cc82659 100644 --- a/src/guidellm/core/request.py +++ b/src/guidellm/core/request.py @@ -1,5 +1,5 @@ import uuid -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, List, Optional from pydantic import Field @@ -42,7 +42,7 @@ def number_images(self) -> int: return len(self.images) @property - def image_resolution(self) -> Tuple[int]: + def image_resolution(self) -> List[int]: if self.images is None: return None else: diff --git a/src/guidellm/request/emulated.py b/src/guidellm/request/emulated.py index 8818ff9..43c3389 100644 --- a/src/guidellm/request/emulated.py +++ b/src/guidellm/request/emulated.py @@ -31,7 +31,8 @@ class EmulatedConfig: generated_tokens_min (Optional[int]): Minimum number of generated tokens. generated_tokens_max (Optional[int]): Maximum number of generated tokens. images (Optional[int]): Number of images. - image_resultion (Optional[List[int]]): Resolution of images. + width (Optional[int]): Width of images. + height (Optional[int]): Height of images. """ @staticmethod @@ -108,7 +109,8 @@ def create_config(config: Optional[Union[str, Path, Dict]]) -> "EmulatedConfig": generated_tokens_max: Optional[int] = None images: int = 0 - image_resolution = None + width: int = None + height: int = None def __post_init__(self): if self.images is not None and self.image_resultion is not None and self.images > 0: @@ -337,7 +339,7 @@ def __init__( settings.emulated_data.filter_end, ) if self._config.images > 0: - self._images = load_images(settings.emulated_data.image_source, self._config.image_resolution) + self._images = load_images(settings.emulated_data.image_source, [self._config.width, self._config.height]) self._rng = np.random.default_rng(random_seed) # NOTE: Must be after all the parameters since the queue population diff --git a/src/guidellm/utils/images.py b/src/guidellm/utils/images.py index 569fe75..72846d7 100644 --- a/src/guidellm/utils/images.py +++ b/src/guidellm/utils/images.py @@ -6,7 +6,7 @@ from bs4 import BeautifulSoup from loguru import logger from PIL import Image -from pydantic import ConfigDict, Field +from pydantic import ConfigDict, Field, computed_field from guidellm.config import settings from guidellm.core.serializable import Serializable @@ -26,6 +26,14 @@ class ImageDescriptor(Serializable): description="Image filename.", ) + @computed_field # type: ignore[misc] + @property + def image_resolution(self) -> List[int]: + if self.images is None: + return None + else: + return [im.size for im in self.images] + def load_images(data: str, image_resolution: Optional[List[int]]) -> List[ImageDescriptor]: """ From 01610325c976dc13082c693cf4f3bd2137d64120 Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Thu, 5 Dec 2024 22:51:42 +0000 Subject: [PATCH 36/45] Fix serialization --- src/guidellm/core/request.py | 4 ++-- src/guidellm/utils/images.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/guidellm/core/request.py b/src/guidellm/core/request.py index cc82659..8ace3d1 100644 --- a/src/guidellm/core/request.py +++ b/src/guidellm/core/request.py @@ -1,5 +1,5 @@ import uuid -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Tuple from pydantic import Field @@ -42,7 +42,7 @@ def number_images(self) -> int: return len(self.images) @property - def image_resolution(self) -> List[int]: + def image_resolution(self) -> List[Tuple[int]]: if self.images is None: return None else: diff --git a/src/guidellm/utils/images.py b/src/guidellm/utils/images.py index 72846d7..ed025db 100644 --- a/src/guidellm/utils/images.py +++ b/src/guidellm/utils/images.py @@ -1,5 +1,5 @@ from io import BytesIO -from typing import List, Optional +from typing import List, Optional, Tuple from urllib.parse import urljoin import requests @@ -28,11 +28,11 @@ class ImageDescriptor(Serializable): @computed_field # type: ignore[misc] @property - def image_resolution(self) -> List[int]: - if self.images is None: + def image_resolution(self) -> Tuple[int]: + if self.image is None: return None else: - return [im.size for im in self.images] + return self.image.size def load_images(data: str, image_resolution: Optional[List[int]]) -> List[ImageDescriptor]: From 35379d369af504498a9fe55fd23be5b24ea2a71c Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Thu, 5 Dec 2024 22:52:30 +0000 Subject: [PATCH 37/45] Fix image registration --- src/guidellm/request/emulated.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/guidellm/request/emulated.py b/src/guidellm/request/emulated.py index 43c3389..02f564a 100644 --- a/src/guidellm/request/emulated.py +++ b/src/guidellm/request/emulated.py @@ -112,10 +112,6 @@ def create_config(config: Optional[Union[str, Path, Dict]]) -> "EmulatedConfig": width: int = None height: int = None - def __post_init__(self): - if self.images is not None and self.image_resultion is not None and self.images > 0: - assert len(self.image_resolution) == 2 - @property def prompt_tokens_range(self) -> Tuple[int, int]: """ From d47887cebfe8564c2842a8c641be3153cac5729f Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Thu, 5 Dec 2024 22:55:43 +0000 Subject: [PATCH 38/45] Fix pydantic format --- src/guidellm/core/request.py | 2 +- src/guidellm/utils/images.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/guidellm/core/request.py b/src/guidellm/core/request.py index 8ace3d1..06d0f37 100644 --- a/src/guidellm/core/request.py +++ b/src/guidellm/core/request.py @@ -42,7 +42,7 @@ def number_images(self) -> int: return len(self.images) @property - def image_resolution(self) -> List[Tuple[int]]: + def image_resolution(self) -> List[Tuple[int, int]]: if self.images is None: return None else: diff --git a/src/guidellm/utils/images.py b/src/guidellm/utils/images.py index ed025db..ed1b30a 100644 --- a/src/guidellm/utils/images.py +++ b/src/guidellm/utils/images.py @@ -28,7 +28,7 @@ class ImageDescriptor(Serializable): @computed_field # type: ignore[misc] @property - def image_resolution(self) -> Tuple[int]: + def image_resolution(self) -> Tuple[int, int]: if self.image is None: return None else: From 33c13ecb93947e417051e252872c1725d773b9e4 Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Thu, 5 Dec 2024 23:20:48 +0000 Subject: [PATCH 39/45] Use resized image --- src/guidellm/utils/images.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/guidellm/utils/images.py b/src/guidellm/utils/images.py index ed1b30a..fb66d43 100644 --- a/src/guidellm/utils/images.py +++ b/src/guidellm/utils/images.py @@ -73,7 +73,7 @@ def load_images(data: str, image_resolution: Optional[List[int]]) -> List[ImageD images.append( ImageDescriptor( url=img_url, - image=Image.open(BytesIO(img_response.content)), + image=image, ) ) From cb324d8b38b1793beb56f460670932f44d133146 Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Fri, 6 Dec 2024 13:54:21 -0500 Subject: [PATCH 40/45] Update pyproject.toml --- pyproject.toml | 2 -- 1 file changed, 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index caaeef0..b83abfd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,8 +40,6 @@ dependencies = [ "base64", "io", "transformers", - "pillow", - "bs4", ] [project.optional-dependencies] From 4d3bc8dfb51314b90790390865324c023e8c96f5 Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Fri, 6 Dec 2024 14:06:33 -0500 Subject: [PATCH 41/45] Update pyproject.toml --- pyproject.toml | 3 --- 1 file changed, 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b83abfd..6ab2c6e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,9 +36,6 @@ dependencies = [ "pyyaml>=6.0.0", "requests", "rich", - "pillow", - "base64", - "io", "transformers", ] From 30e874e0a0a9aa0ad217fb8c0f77ca433b82ecc4 Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Fri, 6 Dec 2024 14:07:04 -0500 Subject: [PATCH 42/45] Update .pre-commit-config.yaml --- .pre-commit-config.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6bcf150..2a085bb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,9 +27,6 @@ repos: pyyaml, requests, rich, - pillow, - base64, - io, transformers, # dev dependencies From 4426822db7ebb5fac1ed4e8e3237ea2bd79e507d Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Fri, 6 Dec 2024 21:54:52 +0000 Subject: [PATCH 43/45] Adds aiohttp backend --- src/guidellm/backend/__init__.py | 2 + src/guidellm/backend/aiohttp.py | 160 +++++++++++++++++++++++++++++++ src/guidellm/backend/base.py | 2 +- src/guidellm/config.py | 4 + 4 files changed, 167 insertions(+), 1 deletion(-) create mode 100644 src/guidellm/backend/aiohttp.py diff --git a/src/guidellm/backend/__init__.py b/src/guidellm/backend/__init__.py index 875e319..1391018 100644 --- a/src/guidellm/backend/__init__.py +++ b/src/guidellm/backend/__init__.py @@ -1,5 +1,6 @@ from .base import Backend, BackendEngine, BackendEnginePublic, GenerativeResponse from .openai import OpenAIBackend +from .aiohttp import AiohttpBackend __all__ = [ "Backend", @@ -7,4 +8,5 @@ "BackendEnginePublic", "GenerativeResponse", "OpenAIBackend", + "AiohttpBackend" ] diff --git a/src/guidellm/backend/aiohttp.py b/src/guidellm/backend/aiohttp.py new file mode 100644 index 0000000..138f45a --- /dev/null +++ b/src/guidellm/backend/aiohttp.py @@ -0,0 +1,160 @@ +from typing import AsyncGenerator, Dict, List, Optional +from loguru import logger + +import aiohttp +import json + +from guidellm.backend.base import Backend, GenerativeResponse +from guidellm.config import settings +from guidellm.core import TextGenerationRequest + +__all__ = ["AiohttpBackend"] + +@Backend.register("aiohttp_server") +class AiohttpBackend(Backend): + """ + An aiohttp-based backend implementation for LLM requests. + + This class provides an interface to communicate with a server hosting + an LLM API using aiohttp for asynchronous requests. + """ + + def __init__( + self, + openai_api_key: Optional[str] = None, + target: Optional[str] = None, + model: Optional[str] = None, + timeout: Optional[float] = None, + **request_args, + ): + self._request_args: Dict = request_args + self._api_key: str = openai_api_key or settings.aiohttp.api_key + + if not self._api_key: + err = ValueError( + "`GUIDELLM__AIOHTTP__API_KEY` environment variable or " + "--openai-api-key CLI parameter must be specified for the " + "aiohttp backend." + ) + logger.error("{}", err) + raise err + + base_url = target or settings.aiohttp.base_url + self._api_url = f"{base_url}/chat/completions" + + if not base_url: + err = ValueError( + "`GUIDELLM__AIOHTTP__BASE_URL` environment variable or " + "target parameter must be specified for the OpenAI backend." + ) + logger.error("{}", err) + raise err + + self._timeout = aiohttp.ClientTimeout(total=timeout or settings.request_timeout) + self._model = model + + super().__init__(type_="aiohttp_backend", target=base_url, model=self._model) + logger.info("aiohttp {} Backend listening on {}", self._model, base_url) + + async def make_request( + self, + request: TextGenerationRequest, + ) -> AsyncGenerator[GenerativeResponse, None]: + """ + Make a request to the aiohttp backend. + + Sends a prompt to the LLM server and streams the response tokens. + + :param request: The text generation request to submit. + :type request: TextGenerationRequest + :yield: A stream of GenerativeResponse objects. + :rtype: AsyncGenerator[GenerativeResponse, None] + """ + + async with aiohttp.ClientSession(timeout=self._timeout) as session: + logger.debug("Making request to aiohttp backend with prompt: {}", request.prompt) + + request_args = {} + if request.output_token_count is not None: + request_args.update( + { + "max_completion_tokens": request.output_token_count, + "stop": None, + "ignore_eos": True, + } + ) + elif settings.aiohttp.max_gen_tokens and settings.aiohttp.max_gen_tokens > 0: + request_args.update( + { + "max_tokens": settings.aiohttp.max_gen_tokens, + } + ) + + request_args.update(self._request_args) + + payload = { + "model": self._model, + "messages": [ + {"role": "user", "content": request.prompt}, + ], + "stream": True, + **request_args, + } + + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {self._api_key}", + } + + try: + async with session.post(url=self._api_url, json=payload, headers=headers) as response: + if response.status != 200: + error_message = await response.text() + logger.error("Request failed: {} - {}", response.status, error_message) + raise Exception(f"Failed to generate response: {error_message}") + + token_count = 0 + async for chunk_bytes in response.content: + chunk_bytes = chunk_bytes.strip() + if not chunk_bytes: + continue + + chunk = chunk_bytes.decode("utf-8").removeprefix("data: ") + if chunk == "[DONE]": + # Final response + yield GenerativeResponse( + type_="final", + prompt=request.prompt, + output_token_count=token_count, + prompt_token_count=request.prompt_token_count, + ) + else: + # Intermediate token response + token_count += 1 + data = json.loads(chunk) + delta = data["choices"][0]["delta"] + token = delta["content"] + yield GenerativeResponse( + type_="token_iter", + add_token=token, + prompt=request.prompt, + output_token_count=token_count, + prompt_token_count=request.prompt_token_count, + ) + except Exception as e: + logger.error("Error while making request: {}", e) + raise + + def available_models(self) -> List[str]: + """ + Retrieve a list of available models from the server. + """ + # This could include an API call to `self._api_url/models` if the server supports it. + logger.warning("Fetching available models is not implemented for aiohttp backend.") + return [] + + def validate_connection(self): + """ + Validate the connection to the backend server. + """ + logger.info("Connection validation is not explicitly implemented for aiohttp backend.") diff --git a/src/guidellm/backend/base.py b/src/guidellm/backend/base.py index d71c5f6..a165859 100644 --- a/src/guidellm/backend/base.py +++ b/src/guidellm/backend/base.py @@ -15,7 +15,7 @@ __all__ = ["Backend", "BackendEngine", "BackendEnginePublic", "GenerativeResponse"] -BackendEnginePublic = Literal["openai_server"] +BackendEnginePublic = Literal["openai_server", "aiohttp_server"] BackendEngine = Union[BackendEnginePublic, Literal["test"]] diff --git a/src/guidellm/config.py b/src/guidellm/config.py index df750ea..a19a624 100644 --- a/src/guidellm/config.py +++ b/src/guidellm/config.py @@ -109,6 +109,9 @@ class OpenAISettings(BaseModel): max_gen_tokens: int = 4096 +class AiohttpSettings(OpenAISettings): + pass + class ReportGenerationSettings(BaseModel): """ Report generation settings for the application @@ -153,6 +156,7 @@ class Settings(BaseSettings): # Request settings openai: OpenAISettings = OpenAISettings() + aiohttp: AiohttpSettings = AiohttpSettings() # Report settings report_generation: ReportGenerationSettings = ReportGenerationSettings() From b7845157d102a44f261129d08e05d580374d9ce3 Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Wed, 11 Dec 2024 04:22:39 +0000 Subject: [PATCH 44/45] Add support for aiohttp backend --- src/guidellm/backend/aiohttp.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/src/guidellm/backend/aiohttp.py b/src/guidellm/backend/aiohttp.py index 138f45a..fbbd971 100644 --- a/src/guidellm/backend/aiohttp.py +++ b/src/guidellm/backend/aiohttp.py @@ -1,3 +1,5 @@ +import base64 +import io from typing import AsyncGenerator, Dict, List, Optional from loguru import logger @@ -92,11 +94,11 @@ async def make_request( request_args.update(self._request_args) + messages = self._build_messages(request) + payload = { "model": self._model, - "messages": [ - {"role": "user", "content": request.prompt}, - ], + "messages": messages, "stream": True, **request_args, } @@ -158,3 +160,21 @@ def validate_connection(self): Validate the connection to the backend server. """ logger.info("Connection validation is not explicitly implemented for aiohttp backend.") + + def _build_messages(self, request: TextGenerationRequest) -> Dict: + if request.number_images == 0: + messages = [{"role": "user", "content": request.prompt}] + else: + content = [] + for image in request.images: + stream = io.BytesIO() + im_format = image.image.format or "PNG" + image.image.save(stream, format=im_format) + im_b64 = base64.b64encode(stream.getvalue()).decode("utf-8") + image_url = {"url": f"data:image/{im_format.lower()};base64,{im_b64}"} + content.append({"type": "image_url", "image_url": image_url}) + + content.append({"type": "text", "text": request.prompt}) + messages = [{"role": "user", "content": content}] + + return messages From b5bac800215da1794cfa6d2217dd544a67000ad5 Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Sat, 8 Feb 2025 01:34:38 +0000 Subject: [PATCH 45/45] Refactor generate_benchmark_report to set default values for parameters --- src/guidellm/main.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/guidellm/main.py b/src/guidellm/main.py index 4016ece..4748b12 100644 --- a/src/guidellm/main.py +++ b/src/guidellm/main.py @@ -186,17 +186,17 @@ def generate_benchmark_report_cli( def generate_benchmark_report( target: str, - backend: BackendEnginePublic, - model: Optional[str], data: Optional[str], data_type: Literal["emulated", "file", "transformers"], - tokenizer: Optional[str], - rate_type: ProfileGenerationMode, - rate: Optional[float], - max_seconds: Optional[int], - max_requests: Union[Literal["dataset"], int, None], - output_path: str, - cont_refresh_table: bool, + backend: BackendEnginePublic="openai_server", + model: Optional[str]=None, + tokenizer: Optional[str]=None, + rate_type: ProfileGenerationMode="sweep", + rate: Optional[float]=None, + max_seconds: Optional[int]=120, + max_requests: Union[Literal["dataset"], int, None]=None, + output_path: str=None, + cont_refresh_table: bool=False, ) -> GuidanceReport: """ Generate a benchmark report for a specified backend and dataset.