From e417eba3c0bfca47f3cec348a85ec42eee815248 Mon Sep 17 00:00:00 2001 From: Daniel Holanda Noronha Date: Wed, 14 Jun 2023 10:42:19 -0700 Subject: [PATCH 01/35] Basic input hashing --- setup.py | 1 + src/mlagility/analysis/analysis.py | 26 +++++++++-- src/mlagility/analysis/status.py | 70 ++++++++++++++++++++---------- src/mlagility/analysis/util.py | 25 +++++++---- src/mlagility/common/filesystem.py | 21 ++++++--- src/onnxflow/common/build.py | 2 + 6 files changed, 102 insertions(+), 43 deletions(-) diff --git a/setup.py b/setup.py index 41ef40d1..17d8ee81 100644 --- a/setup.py +++ b/setup.py @@ -49,6 +49,7 @@ "typeguard>=2.3.13", "packaging>=20.9", "pandas>=1.5.3", + "fasteners", ], extras_require={ "tensorflow": [ diff --git a/src/mlagility/analysis/analysis.py b/src/mlagility/analysis/analysis.py index f3098472..1eb17491 100644 --- a/src/mlagility/analysis/analysis.py +++ b/src/mlagility/analysis/analysis.py @@ -8,6 +8,7 @@ import functools import dataclasses import traceback +import hashlib from typing import Union, List, Dict from types import FrameType, TracebackType from enum import Enum @@ -247,7 +248,16 @@ def call_benchit( def get_model_hash( model: Union[torch.nn.Module, "tf.keras.Model"], model_type: build.ModelType ): - return build.hash_model(model, model_type, hash_params=True)[:8] + return build.hash_model(model, model_type, hash_params=False)[:8] + + +def get_workload_hash(model_hash, *args, **kwargs): + hashable_content = model_hash + if args: + hashable_content += f"{[[b.shape for b in a] for a in args]}" + if kwargs: + hashable_content += f"{[a.shape for a in kwargs.keys()]}" + return hashlib.sha256(hashable_content.encode()).hexdigest()[:8] def store_model_info( @@ -439,15 +449,23 @@ def forward_spy(*args, **kwargs): parent_hash, ) model_hash = get_model_hash(local_var, model_type) + workload_hash = get_workload_hash(model_hash, args, kwargs) model_info = tracer_args.models_found[model_hash] - model_info.exec_time = model_info.exec_time + end_time - start_time + if workload_hash not in model_info.workloads: + model_info.workloads[workload_hash] = util.WorkloadInfo( + hash=workload_hash + ) + model_exec_info = model_info.workloads[workload_hash] + model_exec_info.exec_time = ( + model_exec_info.exec_time + end_time - start_time + ) - model_info.executed = model_info.executed + 1 + model_exec_info.executed = model_exec_info.executed + 1 # Call groqit if this is the first time the model is being executed # and this model has been selected by the user if ( - model_info.executed == 1 + model_exec_info.executed == 1 and model_info.is_target and (model_info.build_model) ): diff --git a/src/mlagility/analysis/status.py b/src/mlagility/analysis/status.py index dac41c93..a9c07dd1 100644 --- a/src/mlagility/analysis/status.py +++ b/src/mlagility/analysis/status.py @@ -23,25 +23,48 @@ def recursive_print( models_found: Dict[str, ModelInfo], parent_hash: Union[str, None] = None, script_names_visited: List[str] = False, + depth: int = 0, ) -> None: script_names_visited = [] - for h in models_found.keys(): - if parent_hash == models_found[h].parent_hash and models_found[h].executed > 0: - print_file_name = models_found[h].script_name not in script_names_visited + for model_hash in models_found.keys(): + workloads_executed = False + for workload_hash in models_found[model_hash].workloads.keys(): + workload = models_found[model_hash].workloads[workload_hash] - print_model(models_found[h], h, print_file_name) + if ( + parent_hash == models_found[model_hash].parent_hash + and workload.executed > 0 + ): - if print_file_name: - script_names_visited.append(models_found[h].script_name) + workloads_executed = True + print_file_name = False + if models_found[model_hash].script_name not in script_names_visited: + script_names_visited.append(models_found[model_hash].script_name) + if depth == 0: + print_file_name = True + print_workload( + models_found[model_hash], model_hash, workload_hash, print_file_name + ) + + if print_file_name: + script_names_visited.append(models_found[model_hash].script_name) + + if workloads_executed: recursive_print( - models_found, parent_hash=h, script_names_visited=script_names_visited + models_found, + parent_hash=model_hash, + script_names_visited=script_names_visited, + depth=depth + 1, ) -def print_model( - model_info: ModelInfo, model_hash: Union[str, None], print_file_name: bool = False +def print_workload( + model_info: ModelInfo, + model_hash: Union[str, None], + workload_hash: Union[str, None], + print_file_name: bool = False, ) -> None: """ Print information about a given model or submodel @@ -54,12 +77,13 @@ def print_model( # Show the number of times the model has been executed # Only show the execution time if we are not running benchit() as this # impacts time measurement. - if model_info.exec_time == 0 or model_info.build_model: + workload = model_info.workloads[workload_hash] + if workload.exec_time == 0 or model_info.build_model: exec_time = "" else: - exec_time = f" - {model_info.exec_time:.2f}s" + exec_time = f" - {workload.exec_time:.2f}s" printing.logn( - f"(executed {model_info.executed}x{exec_time})", + f"(executed {workload.executed}x{exec_time})", c=printing.Colors.OKGREEN, ) @@ -81,31 +105,29 @@ def print_model( print(f"{ident}\tHash:\t\t" + model_hash) # Print benchit results if benchit was run - if model_info.performance: + if workload.performance: printing.log(f"{ident}\tStatus:\t\t") printing.logn( - f"Successfully benchmarked on {model_info.performance.device} ({model_info.performance.runtime} v{model_info.performance.runtime_version})", - c=model_info.status_message_color, + f"Successfully benchmarked on {workload.performance.device} ({workload.performance.runtime} v{workload.performance.runtime_version})", + c=workload.status_message_color, ) printing.logn( - f"{ident}\t\t\tMean Latency:\t{model_info.performance.mean_latency:.3f}" - f"\t{model_info.performance.latency_units}" + f"{ident}\t\t\tMean Latency:\t{workload.performance.mean_latency:.3f}" + f"\t{workload.performance.latency_units}" ) printing.logn( - f"{ident}\t\t\tThroughput:\t{model_info.performance.throughput:.1f}" - f"\t{model_info.performance.throughput_units}" + f"{ident}\t\t\tThroughput:\t{workload.performance.throughput:.1f}" + f"\t{workload.performance.throughput_units}" ) print() else: if model_info.is_target and model_info.build_model: printing.log(f"{ident}\tStatus:\t\t") - printing.logn( - f"{model_info.status_message}", c=model_info.status_message_color - ) + printing.logn(f"{workload.status_message}", c=workload.status_message_color) - if model_info.traceback is not None: + if workload.traceback is not None: if os.environ.get("MLAGILITY_TRACEBACK") != "False": - for line in model_info.traceback: + for line in workload.traceback: for subline in line.split("\n")[:-1]: print(f"{ident}\t{subline}") diff --git a/src/mlagility/analysis/util.py b/src/mlagility/analysis/util.py index 8a1abc2f..839d5071 100644 --- a/src/mlagility/analysis/util.py +++ b/src/mlagility/analysis/util.py @@ -2,6 +2,7 @@ from dataclasses import dataclass from typing import Callable, List, Union, Dict import inspect +import dataclasses import torch import onnx from onnxflow.common import printing @@ -15,6 +16,19 @@ class AnalysisException(Exception): """ +@dataclass +class WorkloadInfo: + hash: Union[str, None] = None + performance: MeasuredPerformance = None + traceback: List[str] = None + inputs: Union[dict, None] = None + executed: int = 0 + exec_time: float = 0.0 + status_message: str = "" + status_message_color: printing.Colors = printing.Colors.ENDC + traceback_message_color: printing.Colors = printing.Colors.FAIL + + @dataclass class ModelInfo: model: torch.nn.Module @@ -26,18 +40,13 @@ class ModelInfo: depth: int = 0 hash: Union[str, None] = None parent_hash: Union[str, None] = None - inputs: Union[dict, None] = None - executed: int = 0 - exec_time: float = 0.0 old_forward: Union[Callable, None] = None - status_message: str = "" - status_message_color: printing.Colors = printing.Colors.ENDC - traceback_message_color: printing.Colors = printing.Colors.FAIL + workloads: Union[Dict[str, WorkloadInfo], None] = dataclasses.field( + default_factory=dict + ) is_target: bool = False build_model: bool = False model_type: build.ModelType = build.ModelType.PYTORCH - performance: MeasuredPerformance = None - traceback: List[str] = None def __post_init__(self): self.params = count_parameters(self.model, self.model_type) diff --git a/src/mlagility/common/filesystem.py b/src/mlagility/common/filesystem.py index 381ba0e9..63c21e07 100644 --- a/src/mlagility/common/filesystem.py +++ b/src/mlagility/common/filesystem.py @@ -6,6 +6,7 @@ from typing import Dict, List import importlib.util import yaml +from fasteners import InterProcessLock import onnxflow.common.printing as printing import onnxflow.common.cache as cache import onnxflow.common.build as build @@ -39,16 +40,22 @@ class CacheError(exc.Error): def _load_yaml(file) -> Dict: - if os.path.isfile(file): - with open(file, "r", encoding="utf8") as stream: - return yaml.load(stream, Loader=yaml.FullLoader) - else: - return {} + with _get_file_lock(file): + if os.path.isfile(file): + with open(file, "r", encoding="utf8") as stream: + return yaml.load(stream, Loader=yaml.FullLoader) + else: + return {} def _save_yaml(dict: Dict, file): - with open(file, "w", encoding="utf8") as outfile: - yaml.dump(dict, outfile) + with _get_file_lock(file): + with open(file, "w", encoding="utf8") as outfile: + yaml.dump(dict, outfile) + + +def _get_file_lock(file): + return InterProcessLock(file.replace(".yaml", ".lock")) def print_yaml_file(file_path, description): diff --git a/src/onnxflow/common/build.py b/src/onnxflow/common/build.py index 7a45a57f..ad65615b 100644 --- a/src/onnxflow/common/build.py +++ b/src/onnxflow/common/build.py @@ -74,6 +74,8 @@ def state_file(cache_dir, build_name): path = os.path.join(output_dir(cache_dir, build_name), state_file_name) return path +def hash_inputs(inputs: dict): + return hashlib.sha256(inputs).hexdigest() def hash_model(model, model_type: ModelType, hash_params: bool = True): # If the model is a path to a file, hash the file From 5b9b1881fe121942338abc8a1a21f9b2b082487f Mon Sep 17 00:00:00 2001 From: Daniel Holanda Noronha Date: Wed, 14 Jun 2023 13:44:21 -0700 Subject: [PATCH 02/35] Showing workload status correctly --- src/mlagility/analysis/analysis.py | 83 +++++++++++++++++------------- 1 file changed, 46 insertions(+), 37 deletions(-) diff --git a/src/mlagility/analysis/analysis.py b/src/mlagility/analysis/analysis.py index 1eb17491..6bf64852 100644 --- a/src/mlagility/analysis/analysis.py +++ b/src/mlagility/analysis/analysis.py @@ -9,7 +9,7 @@ import dataclasses import traceback import hashlib -from typing import Union, List, Dict +from typing import Union, List, Dict, Tuple from types import FrameType, TracebackType from enum import Enum import torch @@ -64,28 +64,31 @@ def torch_activations(self) -> List[str]: return act -def _store_traceback(model_info: util.ModelInfo): +def _store_traceback(workload_info: util.WorkloadInfo): """ - Store the traceback from an exception into model_info so that + Store the traceback from an exception into workload_info so that we can print it during the status update. """ exc_type, exc_value, exc_traceback = sys.exc_info() - model_info.traceback = traceback.format_exception( + workload_info.traceback = traceback.format_exception( exc_type, exc_value, exc_traceback ) def call_benchit( - model_inputs: dict, model_info: util.ModelInfo, tracer_args: TracerArgs + model_inputs: dict, + model_info: util.ModelInfo, + workload_info: util.WorkloadInfo, + tracer_args: TracerArgs, ) -> None: """ Calls the benchit function from within the model forward function """ # Update status to "computing" - model_info.status_message = "Computing..." - model_info.status_message_color = printing.Colors.OKBLUE + workload_info.status_message = "Computing..." + workload_info.status_message_color = printing.Colors.OKBLUE status.update(tracer_args.models_found) # Get a copy of the keyword arguments @@ -112,10 +115,10 @@ def call_benchit( inputs[all_args[i]] = torch.tensor(args[i].detach().numpy()) else: inputs[all_args[i]] = args[i] - model_info.inputs = inputs + workload_info.inputs = inputs build_name = filesystem.get_build_name( - tracer_args.script_name, tracer_args.labels, model_info.hash + tracer_args.script_name, tracer_args.labels, workload_info.hash ) # Save model labels @@ -125,12 +128,12 @@ def call_benchit( perf = None try: if model_info.model_type == build.ModelType.PYTORCH_COMPILED: - model_info.status_message = ( + workload_info.status_message = ( "Skipping model compiled using torch.compile(). " "benchit requires models to be in eager mode " "(regardless of what runtime you have selected)." ) - model_info.status_message_color = printing.Colors.WARNING + workload_info.status_message_color = printing.Colors.WARNING else: perf = benchmark_model( model_info.model, @@ -151,36 +154,36 @@ def call_benchit( onnx_opset=tracer_args.onnx_opset, ) if Action.BENCHMARK in tracer_args.actions: - model_info.status_message = "Model successfully benchmarked!" - model_info.performance = perf - model_info.status_message_color = printing.Colors.OKGREEN + workload_info.status_message = "Model successfully benchmarked!" + workload_info.performance = perf + workload_info.status_message_color = printing.Colors.OKGREEN else: - model_info.status_message = "Model successfully built!" - model_info.status_message_color = printing.Colors.OKGREEN + workload_info.status_message = "Model successfully built!" + workload_info.status_message_color = printing.Colors.OKGREEN except exp.StageError: build_state = build.load_state( cache_dir=tracer_args.cache_dir, build_name=build_name ) - model_info.status_message = "Build Error: see log files for details." - model_info.status_message_color = printing.Colors.WARNING + workload_info.status_message = "Build Error: see log files for details." + workload_info.status_message_color = printing.Colors.WARNING - _store_traceback(model_info) + _store_traceback(workload_info) except exp.Error: - model_info.status_message = "GroqFlowError: see log files for details." - model_info.status_message_color = printing.Colors.WARNING + workload_info.status_message = "GroqFlowError: see log files for details." + workload_info.status_message_color = printing.Colors.WARNING - _store_traceback(model_info) + _store_traceback(workload_info) # This broad exception is ok since enumerating all exceptions is # not possible, as the tested software continuously evolves. except Exception as e: # pylint: disable=broad-except util.stop_stdout_forward() - model_info.status_message = f"Unknown benchit error: {e}" - model_info.status_message_color = printing.Colors.WARNING + workload_info.status_message = f"Unknown benchit error: {e}" + workload_info.status_message_color = printing.Colors.WARNING - _store_traceback(model_info) + _store_traceback(workload_info) finally: # Ensure that stdout is not being forwarded before updating status if hasattr(sys.stdout, "terminal"): @@ -251,12 +254,19 @@ def get_model_hash( return build.hash_model(model, model_type, hash_params=False)[:8] -def get_workload_hash(model_hash, *args, **kwargs): - hashable_content = model_hash - if args: - hashable_content += f"{[[b.shape for b in a] for a in args]}" - if kwargs: - hashable_content += f"{[a.shape for a in kwargs.keys()]}" +def get_workload_hash(model_hash: str, args: Tuple, kwargs: Dict) -> str: + """ + Combines the model hash and the input shapes to create the workload hash + """ + + # Merge positional and keyword args + args = {"positional{}".format(i + 1): arg for i, arg in enumerate(args)} + kwargs = {**kwargs, **args} + + # Get input shapes and types + input_shapes, input_dtypes = build.get_shapes_and_dtypes(kwargs) + + hashable_content = f"{model_hash}{input_shapes}{input_dtypes}" return hashlib.sha256(hashable_content.encode()).hexdigest()[:8] @@ -455,23 +465,22 @@ def forward_spy(*args, **kwargs): model_info.workloads[workload_hash] = util.WorkloadInfo( hash=workload_hash ) - model_exec_info = model_info.workloads[workload_hash] - model_exec_info.exec_time = ( - model_exec_info.exec_time + end_time - start_time - ) + workload_info = model_info.workloads[workload_hash] + workload_info.exec_time = workload_info.exec_time + end_time - start_time - model_exec_info.executed = model_exec_info.executed + 1 + workload_info.executed = workload_info.executed + 1 # Call groqit if this is the first time the model is being executed # and this model has been selected by the user if ( - model_exec_info.executed == 1 + workload_info.executed == 1 and model_info.is_target and (model_info.build_model) ): call_benchit( model_inputs=[args, kwargs], model_info=model_info, + workload_info=workload_info, tracer_args=tracer_args, ) # Ensure that groqit() doesn't interfere with our execution count From 7997a9bcc6c5925617cb1456e5d7db7c33055f21 Mon Sep 17 00:00:00 2001 From: Daniel Holanda Noronha Date: Wed, 14 Jun 2023 18:47:44 -0700 Subject: [PATCH 03/35] Showing workload hash rather than model hash --- src/mlagility/analysis/status.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/mlagility/analysis/status.py b/src/mlagility/analysis/status.py index a9c07dd1..1108c895 100644 --- a/src/mlagility/analysis/status.py +++ b/src/mlagility/analysis/status.py @@ -44,9 +44,7 @@ def recursive_print( if depth == 0: print_file_name = True - print_workload( - models_found[model_hash], model_hash, workload_hash, print_file_name - ) + print_workload(models_found[model_hash], workload_hash, print_file_name) if print_file_name: script_names_visited.append(models_found[model_hash].script_name) @@ -62,7 +60,6 @@ def recursive_print( def print_workload( model_info: ModelInfo, - model_hash: Union[str, None], workload_hash: Union[str, None], print_file_name: bool = False, ) -> None: @@ -102,7 +99,7 @@ def print_workload( model_size = model_info.params * 2 / (1024 * 1024) model_size = "{:.1f}".format(model_size) if model_size > 0.1 else "<0.1" print(f"{ident}\tParameters:\t{'{:,}'.format(model_info.params)} ({model_size} MB)") - print(f"{ident}\tHash:\t\t" + model_hash) + print(f"{ident}\tHash:\t\t" + workload_hash) # Print benchit results if benchit was run if workload.performance: From 056b90be95d29b2ee7bd8bcb8c58fde3ca7276af Mon Sep 17 00:00:00 2001 From: Daniel Holanda Noronha Date: Thu, 15 Jun 2023 11:43:03 -0700 Subject: [PATCH 04/35] Temporarily modifying docker file to enalbe CI --- src/mlagility/analysis/analysis.py | 7 ++++--- src/mlagility/analysis/status.py | 2 +- src/mlagility/analysis/util.py | 2 +- src/mlagility/api/Dockerfile | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/mlagility/analysis/analysis.py b/src/mlagility/analysis/analysis.py index 6bf64852..58284887 100644 --- a/src/mlagility/analysis/analysis.py +++ b/src/mlagility/analysis/analysis.py @@ -312,7 +312,6 @@ def store_model_info( depth=depth, hash=model_hash, parent_hash=parent_hash, - is_target=model_hash in tracer_args.targets or tracer_args.targets == [], build_model=build_model, model_type=model_type, script_name=tracer_args.script_name, @@ -463,7 +462,9 @@ def forward_spy(*args, **kwargs): model_info = tracer_args.models_found[model_hash] if workload_hash not in model_info.workloads: model_info.workloads[workload_hash] = util.WorkloadInfo( - hash=workload_hash + hash=workload_hash, + is_target=workload_hash in tracer_args.targets + or tracer_args.targets == [], ) workload_info = model_info.workloads[workload_hash] workload_info.exec_time = workload_info.exec_time + end_time - start_time @@ -474,7 +475,7 @@ def forward_spy(*args, **kwargs): # and this model has been selected by the user if ( workload_info.executed == 1 - and model_info.is_target + and workload_info.is_target and (model_info.build_model) ): call_benchit( diff --git a/src/mlagility/analysis/status.py b/src/mlagility/analysis/status.py index 1108c895..c776fd0d 100644 --- a/src/mlagility/analysis/status.py +++ b/src/mlagility/analysis/status.py @@ -118,7 +118,7 @@ def print_workload( ) print() else: - if model_info.is_target and model_info.build_model: + if workload.is_target and model_info.build_model: printing.log(f"{ident}\tStatus:\t\t") printing.logn(f"{workload.status_message}", c=workload.status_message_color) diff --git a/src/mlagility/analysis/util.py b/src/mlagility/analysis/util.py index 839d5071..c3251a0a 100644 --- a/src/mlagility/analysis/util.py +++ b/src/mlagility/analysis/util.py @@ -25,6 +25,7 @@ class WorkloadInfo: executed: int = 0 exec_time: float = 0.0 status_message: str = "" + is_target: bool = False status_message_color: printing.Colors = printing.Colors.ENDC traceback_message_color: printing.Colors = printing.Colors.FAIL @@ -44,7 +45,6 @@ class ModelInfo: workloads: Union[Dict[str, WorkloadInfo], None] = dataclasses.field( default_factory=dict ) - is_target: bool = False build_model: bool = False model_type: build.ModelType = build.ModelType.PYTORCH diff --git a/src/mlagility/api/Dockerfile b/src/mlagility/api/Dockerfile index 4a782d2a..a3922cc7 100644 --- a/src/mlagility/api/Dockerfile +++ b/src/mlagility/api/Dockerfile @@ -3,4 +3,4 @@ from httpd RUN apt-get update && apt-get install -y --no-install-recommends python3-dev python3-setuptools python3-wheel python3-pip ENV PYTHONPATH "${PYTHONPATH}:/usr/bin/python3" -RUN pip install onnxruntime==1.14.1 +RUN pip install onnxruntime --break-system-packages From ca647394421ccf6391ab4e1290da4e2d2dbee0ba Mon Sep 17 00:00:00 2001 From: Daniel Holanda Noronha Date: Thu, 15 Jun 2023 11:48:29 -0700 Subject: [PATCH 05/35] Merge main into branch --- src/onnxflow/common/build.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/onnxflow/common/build.py b/src/onnxflow/common/build.py index ad65615b..7a45a57f 100644 --- a/src/onnxflow/common/build.py +++ b/src/onnxflow/common/build.py @@ -74,8 +74,6 @@ def state_file(cache_dir, build_name): path = os.path.join(output_dir(cache_dir, build_name), state_file_name) return path -def hash_inputs(inputs: dict): - return hashlib.sha256(inputs).hexdigest() def hash_model(model, model_type: ModelType, hash_params: bool = True): # If the model is a path to a file, hash the file From 7d29c5ab1a6c7ae633511b38e8f6e7408bcc4a7c Mon Sep 17 00:00:00 2001 From: Daniel Holanda Noronha Date: Thu, 15 Jun 2023 11:50:29 -0700 Subject: [PATCH 06/35] Revert fs changes --- src/mlagility/common/filesystem.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/src/mlagility/common/filesystem.py b/src/mlagility/common/filesystem.py index 34cb37f0..e285f8a7 100644 --- a/src/mlagility/common/filesystem.py +++ b/src/mlagility/common/filesystem.py @@ -40,22 +40,16 @@ class CacheError(exc.Error): def _load_yaml(file) -> Dict: - with _get_file_lock(file): - if os.path.isfile(file): - with open(file, "r", encoding="utf8") as stream: - return yaml.load(stream, Loader=yaml.FullLoader) - else: - return {} + if os.path.isfile(file): + with open(file, "r", encoding="utf8") as stream: + return yaml.load(stream, Loader=yaml.FullLoader) + else: + return {} def _save_yaml(dict: Dict, file): - with _get_file_lock(file): - with open(file, "w", encoding="utf8") as outfile: - yaml.dump(dict, outfile) - - -def _get_file_lock(file): - return InterProcessLock(file.replace(".yaml", ".lock")) + with open(file, "w", encoding="utf8") as outfile: + yaml.dump(dict, outfile) def print_yaml_file(file_path, description): From 3c6cb9e61ba0b89406aaba074ca546256e986c3e Mon Sep 17 00:00:00 2001 From: Daniel Holanda Noronha Date: Thu, 15 Jun 2023 12:18:23 -0700 Subject: [PATCH 07/35] Robust shape extraction --- src/onnxflow/common/build.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/onnxflow/common/build.py b/src/onnxflow/common/build.py index 7a45a57f..5ccf6f68 100644 --- a/src/onnxflow/common/build.py +++ b/src/onnxflow/common/build.py @@ -168,7 +168,10 @@ def get_shapes_and_dtypes(inputs: dict): subkey = f"{key}[{i}]" shapes[subkey] = np.array(v).shape dtypes[subkey] = np.array(v).dtype.name - elif torch.is_tensor(value) or tf_helpers.is_keras_tensor(value): + elif torch.is_tensor(value): + shapes[key] = np.array(value.detach()).shape + dtypes[key] = np.array(value.detach()).dtype.name + elif tf_helpers.is_keras_tensor(value): shapes[key] = np.array(value).shape dtypes[key] = np.array(value).dtype.name elif isinstance(value, np.ndarray): From d3fece502306c03f3e13fe522c054760ee065680 Mon Sep 17 00:00:00 2001 From: Daniel Holanda Noronha Date: Thu, 15 Jun 2023 12:46:10 -0700 Subject: [PATCH 08/35] Update ci test hash --- test/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/cli.py b/test/cli.py index d41e856e..97061152 100644 --- a/test/cli.py +++ b/test/cli.py @@ -420,7 +420,7 @@ def test_004_cli_report(self): linear_summary["model_class"] == "TwoLayerModel" ), f"Wrong class found {linear_summary['model_class']}" assert ( - linear_summary["hash"] == "54dedbb1" + linear_summary["hash"] == "80b93950" ), f"Wrong hash found {linear_summary['hash']}" assert ( float(linear_summary["x86_latency"]) > 0 From 9da81c906448c37ad0a6aa796c44d436a2c5ae58 Mon Sep 17 00:00:00 2001 From: Daniel Holanda Noronha Date: Thu, 15 Jun 2023 13:02:23 -0700 Subject: [PATCH 09/35] Update analysis CI --- test/analysis.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/analysis.py b/test/analysis.py index 447c16c1..0490214a 100644 --- a/test/analysis.py +++ b/test/analysis.py @@ -220,7 +220,7 @@ def test_04_build(self): output = run_analysis( [ "benchit", - "linear_pytorch.py::bf68fb06", + "linear_pytorch.py::87cea470", "--max-depth", "1", "--build-only", @@ -231,7 +231,7 @@ def test_04_build(self): assert np.array_equal(output, (2, 0, 1)) def test_05_cache(self): - model_hash = "bf68fb06" + model_hash = "87cea470" run_analysis( [ "benchit", @@ -321,7 +321,7 @@ def test_12_benchit_hashes(self): output = run_analysis( [ "benchit", - "linear_pytorch.py::bf68fb06", + "linear_pytorch.py::87cea470", "--build-only", "--max-depth", "1", @@ -332,7 +332,7 @@ def test_12_benchit_hashes(self): assert np.array_equal(output, (2, 0, 1)) def test_13_clean_cache(self): - model_hash = "bf68fb06" + model_hash = "87cea470" run_analysis( [ "benchit", From 8aa05f73ece404523b5800fd0972417d05c3c715 Mon Sep 17 00:00:00 2001 From: Daniel Holanda Noronha Date: Thu, 15 Jun 2023 14:38:06 -0700 Subject: [PATCH 10/35] Add test --- test/analysis.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/test/analysis.py b/test/analysis.py index 0490214a..da9d2cbb 100644 --- a/test/analysis.py +++ b/test/analysis.py @@ -118,6 +118,26 @@ def __init__(self, **kwargs): print(parsed_args) +""", + "two_executions": """ +import torch +import timm +from mlagility.parser import parse + +# Parsing command-line arguments +batch_size = parse(["batch_size"]) + +# Creating model and set it to evaluation mode +model = timm.create_model("mobilenetv2_035", pretrained=False) +model.eval() + +# Creating inputs +inputs1 = torch.rand((1, 3, 28, 28)) +inputs2 = torch.rand((1, 3, 224, 224)) + +# Calling model +model(inputs1) +model(inputs2) """, } minimal_tokenizer = """ @@ -358,6 +378,16 @@ def test_13_clean_cache(self): assert cache_is_lean(cache_dir, build_name) + def test_14_same_model_different_input_shapes(self): + output = run_analysis( + [ + "benchit", + "two_executions.py", + "--analyze-only", + ] + ) + assert np.array_equal(output, (2, 0, 0)) + if __name__ == "__main__": unittest.main() From 52c5e16863e3233c03ca2f270d915b1b20982af1 Mon Sep 17 00:00:00 2001 From: Daniel Holanda Noronha Date: Thu, 15 Jun 2023 14:38:46 -0700 Subject: [PATCH 11/35] Updated dockerfile --- src/mlagility/api/Dockerfile | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/src/mlagility/api/Dockerfile b/src/mlagility/api/Dockerfile index a3922cc7..3a020c04 100644 --- a/src/mlagility/api/Dockerfile +++ b/src/mlagility/api/Dockerfile @@ -1,6 +1,28 @@ # Getting image from docker hub from httpd -RUN apt-get update && apt-get install -y --no-install-recommends python3-dev python3-setuptools python3-wheel python3-pip -ENV PYTHONPATH "${PYTHONPATH}:/usr/bin/python3" -RUN pip install onnxruntime --break-system-packages +# Install prerequisites +RUN apt-get update && apt-get install -y \ + build-essential zlib1g-dev libncurses5-dev libgdbm-dev libnss3-dev \ + libssl-dev libreadline-dev libffi-dev wget + +# Download and Install Python 3.8.13 +WORKDIR /tmp +RUN wget https://www.python.org/ftp/python/3.8.13/Python-3.8.13.tgz && \ + tar -xf Python-3.8.13.tgz && \ + cd Python-3.8.13 && \ + ./configure --enable-optimizations && \ + make altinstall + +# Update Python3 and Pip3 to use 3.8.13 version +RUN update-alternatives --install /usr/bin/python3 python3 /usr/local/bin/python3.8 1 && \ + update-alternatives --install /usr/bin/pip3 pip3 /usr/local/bin/pip3.8 1 + +# Set Python 3.8 as the default Python version +RUN update-alternatives --set python3 /usr/local/bin/python3.8 + +# Cleanup +RUN rm -rf /tmp/Python-3.8.13 && \ + rm /tmp/Python-3.8.13.tgz + +RUN python3 -m pip install onnxruntime==1.14.1 \ No newline at end of file From 6aaa2f076360b79dc5e9f66c6c381e44b02c89a0 Mon Sep 17 00:00:00 2001 From: Daniel Holanda Noronha Date: Thu, 15 Jun 2023 17:07:09 -0700 Subject: [PATCH 12/35] Add requirement --- .github/workflows/test_mlagility.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_mlagility.yml b/.github/workflows/test_mlagility.yml index 3cce76f3..e3b84794 100644 --- a/.github/workflows/test_mlagility.yml +++ b/.github/workflows/test_mlagility.yml @@ -34,7 +34,7 @@ jobs: python -m pip install --upgrade pip conda install pylint if [ -f setup.py ]; then pip install -e .; fi - pip install transformers + pip install transformers timm python -m pip check - name: Lint with PyLint shell: bash -el {0} From e02df17f585fd786edaa6e9d79df41f541d8e53d Mon Sep 17 00:00:00 2001 From: Daniel Holanda Noronha Date: Thu, 15 Jun 2023 17:40:33 -0700 Subject: [PATCH 13/35] Added input shape to print --- src/mlagility/analysis/analysis.py | 7 ++++--- src/mlagility/analysis/status.py | 6 +++--- src/mlagility/analysis/util.py | 1 + 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/mlagility/analysis/analysis.py b/src/mlagility/analysis/analysis.py index 58284887..50e235b0 100644 --- a/src/mlagility/analysis/analysis.py +++ b/src/mlagility/analysis/analysis.py @@ -260,14 +260,14 @@ def get_workload_hash(model_hash: str, args: Tuple, kwargs: Dict) -> str: """ # Merge positional and keyword args - args = {"positional{}".format(i + 1): arg for i, arg in enumerate(args)} + args = {"Positional Arg {}".format(i + 1): arg for i, arg in enumerate(args)} kwargs = {**kwargs, **args} # Get input shapes and types input_shapes, input_dtypes = build.get_shapes_and_dtypes(kwargs) hashable_content = f"{model_hash}{input_shapes}{input_dtypes}" - return hashlib.sha256(hashable_content.encode()).hexdigest()[:8] + return hashlib.sha256(hashable_content.encode()).hexdigest()[:8], input_shapes def store_model_info( @@ -458,13 +458,14 @@ def forward_spy(*args, **kwargs): parent_hash, ) model_hash = get_model_hash(local_var, model_type) - workload_hash = get_workload_hash(model_hash, args, kwargs) + workload_hash, input_shapes = get_workload_hash(model_hash, args, kwargs) model_info = tracer_args.models_found[model_hash] if workload_hash not in model_info.workloads: model_info.workloads[workload_hash] = util.WorkloadInfo( hash=workload_hash, is_target=workload_hash in tracer_args.targets or tracer_args.targets == [], + input_shapes=input_shapes, ) workload_info = model_info.workloads[workload_hash] workload_info.exec_time = workload_info.exec_time + end_time - start_time diff --git a/src/mlagility/analysis/status.py b/src/mlagility/analysis/status.py index c776fd0d..c10022aa 100644 --- a/src/mlagility/analysis/status.py +++ b/src/mlagility/analysis/status.py @@ -23,7 +23,6 @@ def recursive_print( models_found: Dict[str, ModelInfo], parent_hash: Union[str, None] = None, script_names_visited: List[str] = False, - depth: int = 0, ) -> None: script_names_visited = [] @@ -41,7 +40,7 @@ def recursive_print( print_file_name = False if models_found[model_hash].script_name not in script_names_visited: script_names_visited.append(models_found[model_hash].script_name) - if depth == 0: + if models_found[model_hash].depth == 0: print_file_name = True print_workload(models_found[model_hash], workload_hash, print_file_name) @@ -54,7 +53,6 @@ def recursive_print( models_found, parent_hash=model_hash, script_names_visited=script_names_visited, - depth=depth + 1, ) @@ -99,6 +97,8 @@ def print_workload( model_size = model_info.params * 2 / (1024 * 1024) model_size = "{:.1f}".format(model_size) if model_size > 0.1 else "<0.1" print(f"{ident}\tParameters:\t{'{:,}'.format(model_info.params)} ({model_size} MB)") + if len(model_info.workloads) > 1: + print(f"{ident}\tInput Shape:\t" + str(model_info.workloads[workload_hash].input_shapes)) print(f"{ident}\tHash:\t\t" + workload_hash) # Print benchit results if benchit was run diff --git a/src/mlagility/analysis/util.py b/src/mlagility/analysis/util.py index c3251a0a..b6850f89 100644 --- a/src/mlagility/analysis/util.py +++ b/src/mlagility/analysis/util.py @@ -22,6 +22,7 @@ class WorkloadInfo: performance: MeasuredPerformance = None traceback: List[str] = None inputs: Union[dict, None] = None + input_shapes: Union[dict, None] = None executed: int = 0 exec_time: float = 0.0 status_message: str = "" From 8593f8bf9e3f50580fffbda910254cec605b4982 Mon Sep 17 00:00:00 2001 From: Daniel Holanda Noronha Date: Fri, 16 Jun 2023 12:08:14 -0700 Subject: [PATCH 14/35] Simplify llama code --- models/llm_layer/llama_layer_prototype.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/models/llm_layer/llama_layer_prototype.py b/models/llm_layer/llama_layer_prototype.py index a6214792..6fc42795 100644 --- a/models/llm_layer/llama_layer_prototype.py +++ b/models/llm_layer/llama_layer_prototype.py @@ -16,10 +16,7 @@ def call_llama_layer(params="7B", use_cache=False): # the models end up with different mlagility hashes # Remove the if-statement when # https://github.com/groq/mlagility/issues/316 is fixed - if use_cache: - torch.manual_seed(0) - else: - torch.manual_seed(1) + torch.manual_seed(0) # Parsing command-line arguments batch_size, max_seq_length = parse(["batch_size", "max_seq_length"]) From 676036fa9ab9d4a2d73261cbd83c9829f143b282 Mon Sep 17 00:00:00 2001 From: Daniel Holanda Noronha Date: Fri, 16 Jun 2023 15:15:01 -0700 Subject: [PATCH 15/35] recursively printing for each model --- src/mlagility/analysis/status.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/mlagility/analysis/status.py b/src/mlagility/analysis/status.py index c10022aa..6eb9fe0f 100644 --- a/src/mlagility/analysis/status.py +++ b/src/mlagility/analysis/status.py @@ -27,7 +27,6 @@ def recursive_print( script_names_visited = [] for model_hash in models_found.keys(): - workloads_executed = False for workload_hash in models_found[model_hash].workloads.keys(): workload = models_found[model_hash].workloads[workload_hash] @@ -36,7 +35,6 @@ def recursive_print( and workload.executed > 0 ): - workloads_executed = True print_file_name = False if models_found[model_hash].script_name not in script_names_visited: script_names_visited.append(models_found[model_hash].script_name) @@ -48,12 +46,11 @@ def recursive_print( if print_file_name: script_names_visited.append(models_found[model_hash].script_name) - if workloads_executed: - recursive_print( - models_found, - parent_hash=model_hash, - script_names_visited=script_names_visited, - ) + recursive_print( + models_found, + parent_hash=model_hash, + script_names_visited=script_names_visited, + ) def print_workload( @@ -97,8 +94,12 @@ def print_workload( model_size = model_info.params * 2 / (1024 * 1024) model_size = "{:.1f}".format(model_size) if model_size > 0.1 else "<0.1" print(f"{ident}\tParameters:\t{'{:,}'.format(model_info.params)} ({model_size} MB)") - if len(model_info.workloads) > 1: - print(f"{ident}\tInput Shape:\t" + str(model_info.workloads[workload_hash].input_shapes)) + input_shape = ( + str(model_info.workloads[workload_hash].input_shapes) + .replace("{", "") + .replace("}", "") + ) + print(f"{ident}\tInput Shape:\t{input_shape}") print(f"{ident}\tHash:\t\t" + workload_hash) # Print benchit results if benchit was run From f6fd7f140dde1b1256403f6cca6b2783228ee854 Mon Sep 17 00:00:00 2001 From: Daniel Holanda Noronha Date: Fri, 16 Jun 2023 15:55:20 -0700 Subject: [PATCH 16/35] Keeping track of parent workload hash --- src/mlagility/analysis/analysis.py | 23 +++++++++++++++++------ src/mlagility/analysis/status.py | 1 + src/mlagility/analysis/util.py | 1 + 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/src/mlagility/analysis/analysis.py b/src/mlagility/analysis/analysis.py index 50e235b0..17479cbe 100644 --- a/src/mlagility/analysis/analysis.py +++ b/src/mlagility/analysis/analysis.py @@ -440,11 +440,6 @@ def forward_spy(*args, **kwargs): # do so by setting the max_depth flag. return old_forward(*args, **kwargs) - # Keep track of execution time - start_time = time.time() - outputs = old_forward(*args, **kwargs) - end_time = time.time() - # We can only keep track of keras models once they have been executed if model_type == build.ModelType.KERAS: store_model_info( @@ -460,16 +455,32 @@ def forward_spy(*args, **kwargs): model_hash = get_model_hash(local_var, model_type) workload_hash, input_shapes = get_workload_hash(model_hash, args, kwargs) model_info = tracer_args.models_found[model_hash] + + # Here we get the parent workload hash by getting the hash of the last workload + # added to the parent model. + parent_workload_hash = "-" + if parent_hash: + parent_workload_hash = "PR found, but workloads is empty" + parent_workloads = list(tracer_args.models_found[parent_hash].workloads) + if parent_workloads: + parent_workload_hash = parent_workloads[-1] + if workload_hash not in model_info.workloads: model_info.workloads[workload_hash] = util.WorkloadInfo( hash=workload_hash, is_target=workload_hash in tracer_args.targets or tracer_args.targets == [], input_shapes=input_shapes, + parent_hash=parent_workload_hash, ) + + # Keep track of execution time + start_time = time.time() + outputs = old_forward(*args, **kwargs) + end_time = time.time() + workload_info = model_info.workloads[workload_hash] workload_info.exec_time = workload_info.exec_time + end_time - start_time - workload_info.executed = workload_info.executed + 1 # Call groqit if this is the first time the model is being executed diff --git a/src/mlagility/analysis/status.py b/src/mlagility/analysis/status.py index 6eb9fe0f..2551deb1 100644 --- a/src/mlagility/analysis/status.py +++ b/src/mlagility/analysis/status.py @@ -101,6 +101,7 @@ def print_workload( ) print(f"{ident}\tInput Shape:\t{input_shape}") print(f"{ident}\tHash:\t\t" + workload_hash) + print(f"{ident}\tParent Hash:\t" + model_info.workloads[workload_hash].parent_hash) # Print benchit results if benchit was run if workload.performance: diff --git a/src/mlagility/analysis/util.py b/src/mlagility/analysis/util.py index b6850f89..5c7284b0 100644 --- a/src/mlagility/analysis/util.py +++ b/src/mlagility/analysis/util.py @@ -19,6 +19,7 @@ class AnalysisException(Exception): @dataclass class WorkloadInfo: hash: Union[str, None] = None + parent_hash: Union[str, None] = None performance: MeasuredPerformance = None traceback: List[str] = None inputs: Union[dict, None] = None From e4623737d20ca1a1e48e4a58b2f84825c0a94e9d Mon Sep 17 00:00:00 2001 From: Daniel Holanda Noronha Date: Fri, 16 Jun 2023 16:44:36 -0700 Subject: [PATCH 17/35] Correctly printing when max_depth is set --- src/mlagility/analysis/analysis.py | 2 +- src/mlagility/analysis/status.py | 15 ++++++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/mlagility/analysis/analysis.py b/src/mlagility/analysis/analysis.py index 17479cbe..c7086793 100644 --- a/src/mlagility/analysis/analysis.py +++ b/src/mlagility/analysis/analysis.py @@ -458,7 +458,7 @@ def forward_spy(*args, **kwargs): # Here we get the parent workload hash by getting the hash of the last workload # added to the parent model. - parent_workload_hash = "-" + parent_workload_hash = None if parent_hash: parent_workload_hash = "PR found, but workloads is empty" parent_workloads = list(tracer_args.models_found[parent_hash].workloads) diff --git a/src/mlagility/analysis/status.py b/src/mlagility/analysis/status.py index 2551deb1..980db32f 100644 --- a/src/mlagility/analysis/status.py +++ b/src/mlagility/analysis/status.py @@ -16,12 +16,13 @@ def update(models_found: Dict[str, ModelInfo]) -> None: "\nModels discovered during profiling:\n", c=printing.Colors.BOLD, ) - recursive_print(models_found, None, []) + recursive_print(models_found, None, None, []) def recursive_print( models_found: Dict[str, ModelInfo], - parent_hash: Union[str, None] = None, + parent_model_hash: Union[str, None] = None, + parent_workload_hash: Union[str, None] = None, script_names_visited: List[str] = False, ) -> None: script_names_visited = [] @@ -31,8 +32,12 @@ def recursive_print( workload = models_found[model_hash].workloads[workload_hash] if ( - parent_hash == models_found[model_hash].parent_hash + parent_model_hash == models_found[model_hash].parent_hash and workload.executed > 0 + and ( + models_found[model_hash].workloads[workload_hash].parent_hash + == parent_workload_hash + ) ): print_file_name = False @@ -48,7 +53,8 @@ def recursive_print( recursive_print( models_found, - parent_hash=model_hash, + parent_model_hash=model_hash, + parent_workload_hash=workload_hash, script_names_visited=script_names_visited, ) @@ -101,7 +107,6 @@ def print_workload( ) print(f"{ident}\tInput Shape:\t{input_shape}") print(f"{ident}\tHash:\t\t" + workload_hash) - print(f"{ident}\tParent Hash:\t" + model_info.workloads[workload_hash].parent_hash) # Print benchit results if benchit was run if workload.performance: From 5f1ba1dd7556c9f321fd2a63ac4044b9c5480209 Mon Sep 17 00:00:00 2001 From: Daniel Holanda Noronha Date: Fri, 16 Jun 2023 16:55:48 -0700 Subject: [PATCH 18/35] Ensure that hashes are different if they come from different workloads --- src/mlagility/analysis/analysis.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/mlagility/analysis/analysis.py b/src/mlagility/analysis/analysis.py index c7086793..b27ab56a 100644 --- a/src/mlagility/analysis/analysis.py +++ b/src/mlagility/analysis/analysis.py @@ -254,9 +254,12 @@ def get_model_hash( return build.hash_model(model, model_type, hash_params=False)[:8] -def get_workload_hash(model_hash: str, args: Tuple, kwargs: Dict) -> str: +def get_workload_hash( + model_hash: str, parent_workload_hash: str, args: Tuple, kwargs: Dict +) -> str: """ Combines the model hash and the input shapes to create the workload hash + We also ensure that workloads that come from different workload parents have different hashes """ # Merge positional and keyword args @@ -266,7 +269,7 @@ def get_workload_hash(model_hash: str, args: Tuple, kwargs: Dict) -> str: # Get input shapes and types input_shapes, input_dtypes = build.get_shapes_and_dtypes(kwargs) - hashable_content = f"{model_hash}{input_shapes}{input_dtypes}" + hashable_content = f"{model_hash}{parent_workload_hash}{input_shapes}{input_dtypes}" return hashlib.sha256(hashable_content.encode()).hexdigest()[:8], input_shapes @@ -452,9 +455,6 @@ def forward_spy(*args, **kwargs): depth, parent_hash, ) - model_hash = get_model_hash(local_var, model_type) - workload_hash, input_shapes = get_workload_hash(model_hash, args, kwargs) - model_info = tracer_args.models_found[model_hash] # Here we get the parent workload hash by getting the hash of the last workload # added to the parent model. @@ -465,6 +465,12 @@ def forward_spy(*args, **kwargs): if parent_workloads: parent_workload_hash = parent_workloads[-1] + model_hash = get_model_hash(local_var, model_type) + workload_hash, input_shapes = get_workload_hash( + model_hash, parent_workload_hash, args, kwargs + ) + model_info = tracer_args.models_found[model_hash] + if workload_hash not in model_info.workloads: model_info.workloads[workload_hash] = util.WorkloadInfo( hash=workload_hash, From 417c701eee4f207cf6fe2858758208ecf6a06302 Mon Sep 17 00:00:00 2001 From: Daniel Holanda Noronha Date: Fri, 16 Jun 2023 17:10:29 -0700 Subject: [PATCH 19/35] Fix CI --- test/analysis.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/analysis.py b/test/analysis.py index da9d2cbb..b9076530 100644 --- a/test/analysis.py +++ b/test/analysis.py @@ -240,7 +240,7 @@ def test_04_build(self): output = run_analysis( [ "benchit", - "linear_pytorch.py::87cea470", + "linear_pytorch.py::76af2f62", "--max-depth", "1", "--build-only", @@ -251,7 +251,7 @@ def test_04_build(self): assert np.array_equal(output, (2, 0, 1)) def test_05_cache(self): - model_hash = "87cea470" + model_hash = "76af2f62" run_analysis( [ "benchit", @@ -341,7 +341,7 @@ def test_12_benchit_hashes(self): output = run_analysis( [ "benchit", - "linear_pytorch.py::87cea470", + "linear_pytorch.py::76af2f62", "--build-only", "--max-depth", "1", @@ -352,7 +352,7 @@ def test_12_benchit_hashes(self): assert np.array_equal(output, (2, 0, 1)) def test_13_clean_cache(self): - model_hash = "87cea470" + model_hash = "76af2f62" run_analysis( [ "benchit", From b2be4bb775f9ef05f32e1ea197b81c9a776f92c2 Mon Sep 17 00:00:00 2001 From: Daniel Holanda Noronha Date: Tue, 20 Jun 2023 10:18:26 -0700 Subject: [PATCH 20/35] Correctly keeping track of last workload executed --- src/mlagility/analysis/analysis.py | 11 +++++------ src/mlagility/analysis/util.py | 1 + 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/mlagility/analysis/analysis.py b/src/mlagility/analysis/analysis.py index b27ab56a..71e1315f 100644 --- a/src/mlagility/analysis/analysis.py +++ b/src/mlagility/analysis/analysis.py @@ -456,14 +456,12 @@ def forward_spy(*args, **kwargs): parent_hash, ) - # Here we get the parent workload hash by getting the hash of the last workload - # added to the parent model. + # Get parent workload hash parent_workload_hash = None if parent_hash: - parent_workload_hash = "PR found, but workloads is empty" - parent_workloads = list(tracer_args.models_found[parent_hash].workloads) - if parent_workloads: - parent_workload_hash = parent_workloads[-1] + parent_workload_hash = tracer_args.models_found[ + parent_hash + ].last_workload_executed model_hash = get_model_hash(local_var, model_type) workload_hash, input_shapes = get_workload_hash( @@ -479,6 +477,7 @@ def forward_spy(*args, **kwargs): input_shapes=input_shapes, parent_hash=parent_workload_hash, ) + model_info.last_workload_executed = workload_hash # Keep track of execution time start_time = time.time() diff --git a/src/mlagility/analysis/util.py b/src/mlagility/analysis/util.py index 5c7284b0..3ac4903c 100644 --- a/src/mlagility/analysis/util.py +++ b/src/mlagility/analysis/util.py @@ -47,6 +47,7 @@ class ModelInfo: workloads: Union[Dict[str, WorkloadInfo], None] = dataclasses.field( default_factory=dict ) + last_workload_executed: Union[str, None] = None build_model: bool = False model_type: build.ModelType = build.ModelType.PYTORCH From c9729303d76c5f0c0434efdd5536594d84ac580e Mon Sep 17 00:00:00 2001 From: Daniel Holanda Noronha Date: Tue, 20 Jun 2023 15:13:54 -0700 Subject: [PATCH 21/35] Better UI --- src/mlagility/analysis/status.py | 102 +++++++++++++++++++------------ 1 file changed, 63 insertions(+), 39 deletions(-) diff --git a/src/mlagility/analysis/status.py b/src/mlagility/analysis/status.py index 980db32f..657586e0 100644 --- a/src/mlagility/analysis/status.py +++ b/src/mlagility/analysis/status.py @@ -28,28 +28,38 @@ def recursive_print( script_names_visited = [] for model_hash in models_found.keys(): - for workload_hash in models_found[model_hash].workloads.keys(): - workload = models_found[model_hash].workloads[workload_hash] + model_visited = False + model_info = models_found[model_hash] + workload_idx = 0 + for workload_hash in model_info.workloads.keys(): + workload = model_info.workloads[workload_hash] if ( - parent_model_hash == models_found[model_hash].parent_hash + parent_model_hash == model_info.parent_hash and workload.executed > 0 and ( - models_found[model_hash].workloads[workload_hash].parent_hash + model_info.workloads[workload_hash].parent_hash == parent_workload_hash ) ): - print_file_name = False - if models_found[model_hash].script_name not in script_names_visited: - script_names_visited.append(models_found[model_hash].script_name) - if models_found[model_hash].depth == 0: + if model_info.script_name not in script_names_visited: + script_names_visited.append(model_info.script_name) + if model_info.depth == 0: print_file_name = True - print_workload(models_found[model_hash], workload_hash, print_file_name) + print_workload( + model_info, + workload_hash, + print_file_name, + workload_idx=workload_idx, + model_visited=model_visited, + ) + model_visited = True + workload_idx += 1 if print_file_name: - script_names_visited.append(models_found[model_hash].script_name) + script_names_visited.append(model_info.script_name) recursive_print( models_found, @@ -63,48 +73,63 @@ def print_workload( model_info: ModelInfo, workload_hash: Union[str, None], print_file_name: bool = False, + workload_idx: int = 0, + model_visited: bool = False, ) -> None: """ Print information about a given model or submodel """ + workload = model_info.workloads[workload_hash] ident = "\t" * (2 * model_info.depth + 1) if print_file_name: print(f"{model_info.script_name}.py:") - printing.log(f"{ident}{model_info.name} ") - # Show the number of times the model has been executed - # Only show the execution time if we are not running benchit() as this - # impacts time measurement. - workload = model_info.workloads[workload_hash] if workload.exec_time == 0 or model_info.build_model: exec_time = "" else: exec_time = f" - {workload.exec_time:.2f}s" - printing.logn( - f"(executed {workload.executed}x{exec_time})", - c=printing.Colors.OKGREEN, - ) - if model_info.model_type == build.ModelType.PYTORCH: - print(f"{ident}\tModel Type:\tPytorch (torch.nn.Module)") - elif model_info.model_type == build.ModelType.KERAS: - print(f"{ident}\tModel Type:\tKeras (tf.keras.Model)") + if model_info.depth == 0: + if not model_visited: + printing.logn(f"{ident}{model_info.name}") + else: + printing.log(f"{ident}{model_info.name}") + printing.logn( + f" (executed {workload.executed}x{exec_time})", + c=printing.Colors.OKGREEN, + ) + + if (model_info.depth == 0 and not model_visited) or (model_info.depth != 0): + if model_info.depth == 0: + if model_info.model_type == build.ModelType.PYTORCH: + print(f"{ident}\tModel Type:\tPytorch (torch.nn.Module)") + elif model_info.model_type == build.ModelType.KERAS: + print(f"{ident}\tModel Type:\tKeras (tf.keras.Model)") + + # Display class of found model and the file where it was found + model_class = type(model_info.model) + print(f"{ident}\tClass:\t\t{model_class.__name__} ({model_class})") + if model_info.depth == 0: + print(f"{ident}\tLocation:\t{model_info.file}, line {model_info.line}") + + # Converting number of parameters to MB assuming 2 bytes per parameter + model_size = model_info.params * 2 / (1024 * 1024) + model_size = "{:.1f}".format(model_size) if model_size > 0.1 else "<0.1" + print( + f"{ident}\tParameters:\t{'{:,}'.format(model_info.params)} ({model_size} MB)" + ) - # Display class of found model and the file where it was found - model_class = type(model_info.model) - print(f"{ident}\tClass:\t\t{model_class.__name__} ({model_class})") if model_info.depth == 0: - print(f"{ident}\tLocation:\t{model_info.file}, line {model_info.line}") - - # Converting number of parameters to MB assuming 2 bytes per parameter - model_size = model_info.params * 2 / (1024 * 1024) - model_size = "{:.1f}".format(model_size) if model_size > 0.1 else "<0.1" - print(f"{ident}\tParameters:\t{'{:,}'.format(model_info.params)} ({model_size} MB)") - input_shape = ( - str(model_info.workloads[workload_hash].input_shapes) - .replace("{", "") - .replace("}", "") - ) + printing.logn( + f"\n{ident}\tWorkload {workload_idx+1} (executed {workload.executed}x{exec_time})", + c=printing.Colors.OKGREEN, + ) + + # Prepare input shape to be printed + input_shape = dict(model_info.workloads[workload_hash].input_shapes) + input_shape = {key: value for key, value in input_shape.items() if value != ()} + input_shape = str(input_shape).replace("{", "").replace("}", "") + print(f"{ident}\tInput Shape:\t{input_shape}") print(f"{ident}\tHash:\t\t" + workload_hash) @@ -143,5 +168,4 @@ def print_workload( ) else: print() - else: - print("") + print() From 6abf7ac42f9a2528ae451cdbd0b469a9ee90828c Mon Sep 17 00:00:00 2001 From: Daniel Holanda Noronha Date: Tue, 20 Jun 2023 16:41:30 -0700 Subject: [PATCH 22/35] Added test --- src/mlagility/analysis/status.py | 4 ++-- src/mlagility/version.py | 2 +- test/analysis.py | 16 +++++++++++++--- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/mlagility/analysis/status.py b/src/mlagility/analysis/status.py index 657586e0..265d9a10 100644 --- a/src/mlagility/analysis/status.py +++ b/src/mlagility/analysis/status.py @@ -89,7 +89,7 @@ def print_workload( else: exec_time = f" - {workload.exec_time:.2f}s" - if model_info.depth == 0: + if model_info.depth == 0 and len(model_info.workloads) > 1: if not model_visited: printing.logn(f"{ident}{model_info.name}") else: @@ -119,7 +119,7 @@ def print_workload( f"{ident}\tParameters:\t{'{:,}'.format(model_info.params)} ({model_size} MB)" ) - if model_info.depth == 0: + if model_info.depth == 0 and len(model_info.workloads) > 1: printing.logn( f"\n{ident}\tWorkload {workload_idx+1} (executed {workload.executed}x{exec_time})", c=printing.Colors.OKGREEN, diff --git a/src/mlagility/version.py b/src/mlagility/version.py index f7493720..1fe90f6a 100644 --- a/src/mlagility/version.py +++ b/src/mlagility/version.py @@ -1 +1 @@ -__version__ = "3.1.3" +__version__ = "3.1.4" diff --git a/test/analysis.py b/test/analysis.py index b9076530..53171c64 100644 --- a/test/analysis.py +++ b/test/analysis.py @@ -124,9 +124,6 @@ def __init__(self, **kwargs): import timm from mlagility.parser import parse -# Parsing command-line arguments -batch_size = parse(["batch_size"]) - # Creating model and set it to evaluation mode model = timm.create_model("mobilenetv2_035", pretrained=False) model.eval() @@ -138,6 +135,7 @@ def __init__(self, **kwargs): # Calling model model(inputs1) model(inputs2) +model(inputs1) """, } minimal_tokenizer = """ @@ -388,6 +386,18 @@ def test_14_same_model_different_input_shapes(self): ) assert np.array_equal(output, (2, 0, 0)) + def test_15_same_model_different_input_shapes_maxdepth(self): + output = run_analysis( + [ + "benchit", + "two_executions.py", + "--analyze-only", + "--max-depth", + "1", + ] + ) + assert np.array_equal(output, (6, 0, 0)) + if __name__ == "__main__": unittest.main() From 0d34fa7742a8617af9ffb903a85bf62beaefc33b Mon Sep 17 00:00:00 2001 From: Daniel Holanda Noronha Date: Tue, 20 Jun 2023 16:47:06 -0700 Subject: [PATCH 23/35] Renamed function as suggested --- src/mlagility/analysis/analysis.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mlagility/analysis/analysis.py b/src/mlagility/analysis/analysis.py index 71e1315f..125520f3 100644 --- a/src/mlagility/analysis/analysis.py +++ b/src/mlagility/analysis/analysis.py @@ -76,7 +76,7 @@ def _store_traceback(workload_info: util.WorkloadInfo): ) -def call_benchit( +def explore_workload( model_inputs: dict, model_info: util.ModelInfo, workload_info: util.WorkloadInfo, @@ -495,7 +495,7 @@ def forward_spy(*args, **kwargs): and workload_info.is_target and (model_info.build_model) ): - call_benchit( + explore_workload( model_inputs=[args, kwargs], model_info=model_info, workload_info=workload_info, From b4b6371758e074c9452e19530185dfa0c45d0c93 Mon Sep 17 00:00:00 2001 From: Daniel Holanda Noronha Date: Wed, 21 Jun 2023 10:54:53 -0700 Subject: [PATCH 24/35] Change model to workload where appropriate --- docs/code.md | 8 +- docs/readme.md | 2 +- docs/tools_user_guide.md | 148 +++++++++++----------- examples/cli/build.md | 10 +- examples/cli/cache.md | 4 +- examples/cli/discovery.md | 74 +++++------ examples/cli/extras/example_sequence.py | 2 +- examples/cli/readme.md | 34 ++--- examples/model_api/hello_world.py | 4 +- examples/readme.md | 2 +- models/llm_layer/llama_layer_prototype.py | 4 - src/mlagility/__init__.py | 2 +- src/mlagility/analysis/analysis.py | 10 +- src/mlagility/analysis/status.py | 2 +- src/mlagility/api/model_api.py | 2 +- src/mlagility/api/script_api.py | 8 +- test/model_api.py | 10 +- 17 files changed, 164 insertions(+), 162 deletions(-) diff --git a/docs/code.md b/docs/code.md index b18daf26..c7c0daa3 100644 --- a/docs/code.md +++ b/docs/code.md @@ -11,7 +11,7 @@ The MLAgility repository has a few major top-level directories: - `models`: the corpora of models that makes up the MLAgility benchmark (see [the models readme](https://github.com/groq/mlagility/blob/main/models/readme.md)). - Each subdirectory under `models` represents a corpus of models pulled from somewhere on the internet. For example, `models/torch_hub` is a corpus of models from [Torch Hub](https://github.com/pytorch/hub). - `src/mlagility`: source code for the MLAgility benchmarking tools (see [Benchmarking Tools](#benchmarking-tools) for a description of how the code is used). - - `src/mlagility/analysis`: functions for profiling a model script, discovering model instances, and invoking `benchmark_model()` on those instances. + - `src/mlagility/analysis`: functions for profiling a model script, discovering model instances, and invoking `benchmark_workload()` on those instances. - `src/mlagility/api`: implements the benchmarking APIs. - `src/mlagility/cli`: implements the `benchit` CLI. - `src/mlagility/common`: functions common to the other modules. @@ -29,10 +29,10 @@ MLAgility provides two main tools, the `benchit` CLI and benchmarking APIs. Inst 1. The default command for `benchit` CLI runs the `benchmark_script()` API, which is implemented in [api/script_api.py](https://github.com/groq/mlagility/blob/main/src/mlagility/api/script_api.py). - Other CLI commands are also implemented in `cli/`, for example the `report` command is implemented in `cli/report.py`. 1. The `benchmark_script()` API takes in a set of scripts, each of which should invoke at least one model instance, to evaluate and passes each into the `evaluate_script()` function for analysis, which is implemented in [analysis/analysis.py](https://github.com/groq/mlagility/blob/main/src/mlagility/analysis/analysis.py). -1. `evaluate_script()` uses a profiler to discover the model instances in the script, and passes each into the `benchmark_model()` API, which is defined in [api/model_api.py](https://github.com/groq/mlagility/blob/main/src/mlagility/api/model_api.py). -1. The `benchmark_model()` API prepares the model for benchmarking (e.g., exporting and optimizing an ONNX file), which creates an instance of a `*Model` class, where `*` can be CPU, GPU, etc. The `*Model` classes are defined in [api/ortmodel.py](https://github.com/groq/mlagility/blob/main/src/mlagility/api/ortmodel.py), [api/trtmodel.py](https://github.com/groq/mlagility/blob/main/src/mlagility/api/trtmodel.py), etc. +1. `evaluate_script()` uses a profiler to discover the model instances in the script, and passes each into the `benchmark_workload()` API, which is defined in [api/model_api.py](https://github.com/groq/mlagility/blob/main/src/mlagility/api/model_api.py). +1. The `benchmark_workload()` API prepares the model for benchmarking (e.g., exporting and optimizing an ONNX file), which creates an instance of a `*Model` class, where `*` can be CPU, GPU, etc. The `*Model` classes are defined in [api/ortmodel.py](https://github.com/groq/mlagility/blob/main/src/mlagility/api/ortmodel.py), [api/trtmodel.py](https://github.com/groq/mlagility/blob/main/src/mlagility/api/trtmodel.py), etc. 1. The `*Model` classes provide a `.benchmark()` method that benchmarks the model on the device and returns an instance of the `MeasuredPerformance` class, which includes the performance statistics acquired during benchmarking. -1. `benchmark_model()` and the `*Model` classes are built using [`onnxflow.build_model()`](#model-build-tool) +1. `benchmark_workload()` and the `*Model` classes are built using [`onnxflow.build_model()`](#model-build-tool) # Model Build Tool diff --git a/docs/readme.md b/docs/readme.md index 42dbaf72..d9f0326a 100644 --- a/docs/readme.md +++ b/docs/readme.md @@ -3,7 +3,7 @@ This directory contains documentation for the MLAgility project: - [code.md](https://github.com/groq/mlagility/blob/main/docs/code.md): Code organization for the benchmark and tools. - [install.md](https://github.com/groq/mlagility/blob/main/docs/install.md): Installation instructions for the MLAgility project. -- [tools_user_guide.md](https://github.com/groq/mlagility/blob/main/docs/tools_user_guide.md): User guide for the MLAgility benchmarking tools: `benchit` CLI, `benchmark_script()`, and `benchmark_model()`. +- [tools_user_guide.md](https://github.com/groq/mlagility/blob/main/docs/tools_user_guide.md): User guide for the MLAgility benchmarking tools: `benchit` CLI, `benchmark_script()`, and `benchmark_workload()`. - [versioning.md](https://github.com/groq/mlagility/blob/main/docs/versioning.md): Defines the semantic versioning rules for the `mlagility` package. There is more useful documentation avialable in: diff --git a/docs/tools_user_guide.md b/docs/tools_user_guide.md index 09916eff..d5a352ac 100644 --- a/docs/tools_user_guide.md +++ b/docs/tools_user_guide.md @@ -51,9 +51,9 @@ Example Output: Where `your_script.py` is a Python script that instantiates and executes a PyTorch model named `YourModel`. The benchmarking results are also saved to a `build directory` in the `MLAgility cache` (see [Build](#build)). The `benchit` CLI performs the following steps: -1. [Analysis](#analysis): profile the Python script to identify the PyTorch models within -2. [Build](#build): call the `benchmark_script()` [API](#the-mlagility-api) to prepare each model for benchmarking -3. [Benchmark](#benchmark): call the `benchmark_model()` [API](#the-mlagility-api) on each model to gather performance statistics +1. [Analysis](#analysis): profile the Python script to identify the PyTorch workloads within +2. [Build](#build): call the `benchmark_script()` [API](#the-mlagility-api) to prepare each workload for benchmarking +3. [Benchmark](#benchmark): call the `benchmark_workload()` [API](#the-mlagility-api) on each workload to gather performance statistics _Note_: The benchmarking methodology is defined [here](#benchmark). If you are looking for more detailed instructions on how to install mlagility, you can find that [here](https://github.com/groq/mlagility/blob/main/docs/install.md). @@ -65,19 +65,19 @@ _Note_: The benchmarking methodology is defined [here](#benchmark). If you are l Most of the functionality provided by the `benchit` CLI is also available in the MLAgility API: - `mlagility.benchmark_script()` provides the same benchmarking functionality as the `benchit` CLI: it takes a script and target device, and returns performance results. -- `mlagility.benchmark_model()` provides a subset of this functionality: it takes a model and its inputs, and returns performance results. - - The main difference is that `benchmark_model()` does not include the [Analysis](#analysis) feature, and `benchmark_script()` does. +- `mlagility.benchmark_workload()` provides a subset of this functionality: it takes a model and its inputs, and returns performance results. + - The main difference is that `benchmark_workload()` does not include the [Analysis](#analysis) feature, and `benchmark_script()` does. -Generally speaking, the `benchit` CLI is a command line interface for the `benchmark_script()` API, which internally calls `benchmark_model()`. You can read more about this code organization [here](https://github.com/groq/mlagility/blob/main/docs/code.md). +Generally speaking, the `benchit` CLI is a command line interface for the `benchmark_script()` API, which internally calls `benchmark_workload()`. You can read more about this code organization [here](https://github.com/groq/mlagility/blob/main/docs/code.md). For example, the following script: ```python -from mlagility import benchmark_model +from mlagility import benchmark_workload model = YourModel() results = model(**inputs) -perf = benchmark_model(model, inputs) +perf = benchmark_workload(model, inputs) ``` Will print an output like this: @@ -88,7 +88,7 @@ Will print an output like this: > throughput: 21784.8 ips ``` -`benchmark_model()` returns a `MeasuredPerformance` object that includes members: +`benchmark_workload()` returns a `MeasuredPerformance` object that includes members: - `latency_units`: unit of time used for measuring latency, which is set to `milliseconds (ms)`. - `mean_latency`: average benchmarking latency, measured in `latency_units`. - `throughput_units`: unit used for measuring throughput, which is set to `inferences per second (IPS)`. @@ -106,34 +106,40 @@ A **model** is a PyTorch (torch.nn.Module) instance that has been instantiated i - Examples: BERT-Base, ResNet-50, etc. +## Workload + +A **workload** is a model executed with inputs of a given shape. + +- Example: BERT-Base executed with 'attention_mask' and 'input_ids' of shape (1, 128). + ## Device -A **device** is a piece of hardware capable of running a model. +A **device** is a piece of hardware capable of running a workload. - Examples: Nvidia A100 40GB, Intel Xeon Platinum 8380, Groq GroqChip1 ## Runtime -A **runtime** is a piece of software that executes a model on a device. +A **runtime** is a piece of software that executes a workload on a device. - Different runtimes can produce different performance results on the same device because: - - Runtimes often optimize the model prior to execution. + - Runtimes often optimize the workload prior to execution. - The runtime is responsible for orchestrating data movement, device invocation, etc. - Examples: ONNX Runtime, TensorRT, PyTorch Eager Execution, etc. ## Analysis -**Analysis** is the process by which `benchmark_script()` inspects a Python script and identifies the PyTorch models within. +**Analysis** is the process by which `benchmark_script()` inspects a Python script and identifies the PyTorch workloads within. -`benchmark_script()` performs analysis by running and profiling your script. When a model object (see [Model](#model) is encountered, it is inspected to gather statistics (such as the number of parameters in the model) and/or pass it to the `benchmark_model()` API for benchmarking. +`benchmark_script()` performs analysis by running and profiling your script. When a workload (see [Workload](#workload) is encountered, it is inspected to gather statistics (such as the number of parameters in the model) and/or pass it to the `benchmark_workload()` API for benchmarking. > _Note_: the `benchit` CLI and `benchmark_script()` API both run your entire script. Please ensure that your script is safe to run, especially if you got it from the internet. -> See the [Multiple Models per Script tutorial](https://github.com/groq/mlagility/blob/main/examples/cli/readme.md#multiple-models-per-script) for a detailed example of how analysis can discover multiple models from a single script. +> See the [Multiple Workloads per Script tutorial](https://github.com/groq/mlagility/blob/main/examples/cli/readme.md#multiple-workloads-per-script) for a detailed example of how analysis can discover multiple models from a single script. -## Model Hashes +## Workload Hashes -Each `model` in a `script` is identified by a unique `hash`. The `analysis` phase of `benchmark_script()` will display the `hash` for each model. The `build` phase will save exported models to into the `cache` according to the naming scheme `{script_name}_{hash}`. +Each `workload` in a `script` is identified by a unique `hash`. The `analysis` phase of `benchmark_script()` will display the `hash` for each worload. The `build` phase will save exported workloads to into the `cache` according to the naming scheme `{script_name}_{hash}`. For example: @@ -163,40 +169,40 @@ Once a script has been benchmarked, all labels that correspond to that script wi ## Build -**Build** is the process by which the `benchmark_model()` API consumes a [model](#model) and produces ONNX files, Groq executables, and other artifacts needed for benchmarking. +**Build** is the process by which the `benchmark_workload()` API consumes a [model](#model) and a corresponding set of `inputs` and produces ONNX files, Groq executables, and other artifacts needed for benchmarking. We refer to this collection of artifacts as the `build directory` and store each build in the MLAgility `cache` for later use. We leverage ONNX files because of their broad compatibility with model frameworks (PyTorch, Keras, etc.), software (ONNX Runtime, TensorRT, Groq Compiler, etc.), and devices (CPUs, GPUs, GroqChip processors, etc.). You can learn more about ONNX [here](https://onnx.ai/). -The build functionality of `benchmark_model()` includes the following steps: -1. Take a `model` object and a corresponding set of `inputs`*. +The build functionality of `benchmark_workload()` includes the following steps: +1. Take a `model` object and a corresponding set of `inputs`* (which we call a [workload](#workload)). 1. Check the cache for a successful build we can load. If we get a cache hit, the build is done. If no build is found, or the build in the cache is stale**, continue. 1. Pass the `model` and `inputs` to the ONNX exporter corresponding to the `model`'s framework (e.g., PyTorch models use `torch.onnx.export()`). -1. Use [ONNX Runtime](https://github.com/microsoft/onnxruntime) and [ONNX ML tools](https://github.com/onnx/onnxmltools) to optimize the model and convert it to float16, respectively. +1. Use [ONNX Runtime](https://github.com/microsoft/onnxruntime) and [ONNX ML tools](https://github.com/onnx/onnxmltools) to optimize the workload and convert it to float16, respectively. 1. [If the build's device type is `groq`] Pass the optimized float16 ONNX file to Groq Compiler and Assembler to produce a Groq executable. 1. Save the successful build to the cache for later use. -> *_Note_: Each `build` corresponds to a set of static input shapes. `inputs` are passed into the `benchmark_model()` API to provide those shapes. +> *_Note_: Each `build` corresponds to a set of static input shapes. `inputs` are passed into the `benchmark_workload()` API to provide those shapes. > **_Note_: A cached build can be stale because of any of the following changes since the last build: > * The model changed > * The shape of the inputs changed -> * The arguments to `benchmark_model()` changed +> * The arguments to `benchmark_workload()` changed > * MLAgility was updated to a new, incompatible version ## Benchmark -*Benchmark* is the process by which the `benchmark_model()` API collects performance statistics about a [model](#model). Specifically, `benchmark_model()` takes a [build](#build) of a model and executes it on a target device using target runtime software (see [Devices and Runtimes](#devices-and-runtimes)). +*Benchmark* is the process by which the `benchmark_workload()` API collects performance statistics about a [workload](#workload). Specifically, `benchmark_workload()` takes a [build](#build) of a workload and executes it on a target device using target runtime software (see [Devices and Runtimes](#devices-and-runtimes)). -By default, `benchmark_model()` will run the model 100 times to collect the following statistics: +By default, `benchmark_workload()` will run the workload 100 times to collect the following statistics: 1. Mean Latency, in milliseconds (ms): the average time it takes the runtime/device combination to execute the model/inputs combination once. This includes the time spent invoking the device and transferring the model's inputs and outputs between host memory and the device (when applicable). 1. Throughput, in inferences per second (IPS): the number of times the model/inputs combination can be executed on the runtime/device combination per second. - > - _Note_: `benchmark_model()` is not aware of whether `inputs` is a single input or a batch of inputs. If your `inputs` is actually a batch of inputs, you should multiply `benchmark_model()`'s reported IPS by the batch size. + > - _Note_: `benchmark_workload()` is not aware of whether `inputs` is a single input or a batch of inputs. If your `inputs` is actually a batch of inputs, you should multiply `benchmark_workload()`'s reported IPS by the batch size. # Devices and Runtimes -MLAgility can be used to benchmark a model across a variety of runtimes and devices, as long as the device is available and the device/runtime combination is supported by MLAgility. +MLAgility can be used to benchmark a workload across a variety of runtimes and devices, as long as the device is available and the device/runtime combination is supported by MLAgility. ## Available Devices @@ -208,7 +214,7 @@ If you are using a remote machine, it must: - include the target device - have `miniconda`, `python>=3.8`, and `docker>=20.10` installed -When you call `benchit` CLI or `benchmark_model()`, the following actions are performed on your behalf: +When you call `benchit` CLI or `benchmark_workload()`, the following actions are performed on your behalf: 1. Perform a `build`, which exports all models from the script to ONNX and prepares for benchmarking. - If the device type selected is `groq`, this step also compiles the ONNX file into a Groq executable. 1. [Remote mode only] `ssh` into the remote machine and transfer the `build`. @@ -226,7 +232,7 @@ Specify a device type that will be used for benchmarking. Usage: - `benchit benchmark INPUT_FILES --device TYPE` - - Benchmark the model(s) in `INPUT_FILES` on a locally installed device of type `TYPE` (eg, a locally installed Nvidia device). + - Benchmark the workload(s) in `INPUT_FILES` on a locally installed device of type `TYPE` (eg, a locally installed Nvidia device). Valid values of `TYPE` include: - `x86` (default): Intel and AMD x86 CPUs. @@ -239,7 +245,7 @@ Valid values of `TYPE` include: Also available as API arguments: - `benchmark_script(device=...)` -- `benchmark_model(device=...)`. +- `benchmark_workload(device=...)`. > For a detailed example, see the [CLI Nvidia tutorial](https://github.com/groq/mlagility/blob/main/examples/cli/readme.md#nvidia-benchmarking). @@ -263,7 +269,7 @@ Valid values: Also available as API arguments: - `benchmark_script(backend=...)` -- `benchmark_model(backend=...)` +- `benchmark_workload(backend=...)` ### Runtimes @@ -284,7 +290,7 @@ Each device type has its own default runtime, as indicated below. This feature is also be available as an API argument: - `benchmark_script(runtimes=[...])` -- `benchmark_model(runtime=...)` +- `benchmark_workload(runtime=...)` > _Note_: `torch-eager` and `torch-compiled` are not available whe using the `remote` backend. @@ -297,7 +303,7 @@ The default usage of `benchit` is to directly provide it with a python script, f _Note_: Some of these tasks have to do with the MLAgility `cache`, which stores the `build directories` (see [Build](#build)). The commands are: -- [`benchmark`](#benchmark-command) (default command): benchmark the model(s) in one or more scripts +- [`benchmark`](#benchmark-command) (default command): benchmark the workload(s) in one or more scripts - [`cache list`](#list-command): list the available builds in the cache - [`cache print`](#print-command): print the [state](https://github.com/groq/groqflow/blob/main/docs/user_guide.md#stateyaml-file) of a build from the cache - [`cache delete`](#delete-command): delete one or more builds from the cache @@ -328,27 +334,27 @@ Examples: - `benchit *.py` - Benchmark all scripts which can be found at the current working directory. - `benchit models/*/*.py` - - Benchmark the entire corpora of MLAgility models. + - Benchmark the entire corpora of MLAgility. - `benchit *.onnx` - Benchmark all ONNX files which can be found at the current working directory. > See the [Benchmark Multiple Scripts tutorial](https://github.com/groq/mlagility/blob/main/examples/cli/discovery.md#benchmark-multiple-scripts) for a detailed example. -You can also leverage model hashes (see [Model Hashes](#model-hashes)) to filter which models in a script will be acted on, in the following manner: - - `benchit example.py::hash_0` will only benchmark the model corresponding to `hash_0`. - - You can also supply multiple hashes, for example `benchit example.py::hash_0,hash_1` will benchmark the models corresponding to both `hash_0` and `hash_1`. +You can also leverage workload hashes (see [Workload Hashes](#workload-hashes)) to filter which workloads in a script will be acted on, in the following manner: + - `benchit example.py::hash_0` will only benchmark the workload corresponding to `hash_0`. + - You can also supply multiple hashes, for example `benchit example.py::hash_0,hash_1` will benchmark the workloads corresponding to both `hash_0` and `hash_1`. -> _Note_: Using bash regular expressions and filtering model by hashes are mutually exclusive. To filter models by hashes, provide the full path of the Python script rather than a regular expression. +> _Note_: Using bash regular expressions and filtering workload by hashes are mutually exclusive. To filter workloads by hashes, provide the full path of the Python script rather than a regular expression. -> See the [Filtering Model Hashes tutorial](https://github.com/groq/mlagility/blob/main/examples/cli/discovery.md#filtering-model-hashes) for a detailed example. +> See the [Filtering Workload Hashes tutorial](https://github.com/groq/mlagility/blob/main/examples/cli/discovery.md#filtering-workload-hashes) for a detailed example. -Additionally, you can leverage labels (see [Labels](#labels)) to filter which models in a script will be acted on, in the following manner: +Additionally, you can leverage labels (see [Labels](#labels)) to filter which workloads in a script will be acted on, in the following manner: - `benchit *.py --labels test_group::a` will only benchmark the scripts labels with `test_group::a`. - You can also supply multiple labels, for example `benchit *.py --labels test_group::a domain::nlp` only benchmark scripts that have both `test_group::a`, and `domain::nlp` labels. -> _Note_: Using bash regular expressions and filtering model by hashes are mutually exclusive. To filter models by hashes, provide the full path of the Python script rather than a regular expression. +> _Note_: Using bash regular expressions and filtering workload by hashes are mutually exclusive. To filter workloads by hashes, provide the full path of the Python script rather than a regular expression. -> _Note_: ONNX file input currently supports only models of size less than 2 GB. ONNX files passed directly into `benchit *.onnx` are benchmarked as-is without applying any additional build stages. +> _Note_: ONNX file input currently supports only workloads of size less than 2 GB. ONNX files passed directly into `benchit *.onnx` are benchmarked as-is without applying any additional build stages. ### Use Slurm @@ -371,7 +377,7 @@ Available as an API argument: ### Process Isolation -Evaluate each `benchit` input in its own isolated subprocess. This option allows the main process to continue on to the next input if the current input fails for any reason (e.g., a bug in the input script, the operating system running out of memory, incompatibility between a model and the selected benchmarking runtime, etc.). +Evaluate each `benchit` input in its own isolated subprocess. This option allows the main process to continue on to the next input if the current input fails for any reason (e.g., a bug in the input script, the operating system running out of memory, incompatibility between a workload and the selected benchmarking runtime, etc.). Usage: - `benchit benchmark INPUT_FILES --process-isolation` @@ -391,7 +397,7 @@ Also available as an API argument: Also available as API arguments: - `benchmark_script(cache_dir=...)` -- `benchmark_model(cache_dir=...)` +- `benchmark_workload(cache_dir=...)` > See the [Cache Directory tutorial](https://github.com/groq/mlagility/blob/main/examples/cli/cache.md#cache-directory) for a detailed example. @@ -401,9 +407,9 @@ Also available as API arguments: Also available as API arguments: - `benchmark_script(lean_cache=True/False, ...)` (default False) -- `benchmark_model(lean_cache=True/False, ...)` (default False) +- `benchmark_workload(lean_cache=True/False, ...)` (default False) -> _Note_: useful for benchmarking many models, since the `build` artifacts from the models can take up a significant amount of hard drive space. +> _Note_: useful for benchmarking many workloads, since the `build` artifacts from the workloads can take up a significant amount of hard drive space. > See the [Lean Cache tutorial](https://github.com/groq/mlagility/blob/main/examples/cli/cache.md#lean-cache) for a detailed example. @@ -412,19 +418,19 @@ Also available as API arguments: `--rebuild REBUILD` Sets a cache policy that decides whether to load or rebuild a cached build. Takes one of the following values: - - *Default*: `"if_needed"` will use a cached model if available, build one if it is not available,and rebuild any stale builds. - - Set `"always"` to force `benchit` to always rebuild your model, regardless of whether it is available in the cache or not. - - Set `"never"` to make sure `benchit` never rebuilds your model, even if it is stale. `benchit`will attempt to load any previously built model in the cache, however there is no guarantee it will be functional or correct. + - *Default*: `"if_needed"` will use a cached workload if available, build one if it is not available,and rebuild any stale builds. + - Set `"always"` to force `benchit` to always rebuild your workload, regardless of whether it is available in the cache or not. + - Set `"never"` to make sure `benchit` never rebuilds your workload, even if it is stale. `benchit`will attempt to load any previously built workload in the cache, however there is no guarantee it will be functional or correct. Also available as API arguments: - `benchmark_script(rebuild=...)` -- `benchmark_model(rebuild=...)` +- `benchmark_workload(rebuild=...)` > See the [GroqFlow rebuild examples](https://github.com/groq/groqflow/tree/main/examples) to learn more. ### Sequence File -Replaces the default build sequence in `benchmark_model()` with a custom build sequence, defined in a Python script. +Replaces the default build sequence in `benchmark_workload()` with a custom build sequence, defined in a Python script. Usage: - `benchit benchmark INPUT_FILES --sequence-file FILE` @@ -433,9 +439,9 @@ This script must define a function, `get_sequence()`, that returns an instance o Also available as API arguments: - `benchmark_script(sequence=...)` -- `benchmark_model(sequence=...)` +- `benchmark_workload(sequence=...)` -> _Note_: the `sequence` argument to `benchmark_script()` can be either a sequence file or a `Sequence` instance. The `sequence` argument to `benchmark_model()` must be a `Sequence` instance. +> _Note_: the `sequence` argument to `benchmark_script()` can be either a sequence file or a `Sequence` instance. The `sequence` argument to `benchmark_workload()` must be a `Sequence` instance. > See the [Sequence File tutorial](https://github.com/groq/mlagility/blob/main/examples/cli/build.md#sequence-file) for a detailed example. @@ -454,7 +460,7 @@ Also available as an API argument: ### Maximum Analysis Depth -Depth of sub-models to inspect within the script. Default value is 0, indicating to only analyze models at the top level of the script. Depth of 1 would indicate to analyze the first level of sub-models within the top-level models. +Depth of sub-workloads to inspect within the script. Default value is 0, indicating to only analyze workloads at the top level of the script. Depth of 1 would indicate to analyze the first level of sub-workloads within the top-level workloads. Usage: - `benchit benchmark INPUT_FILES --max-depth DEPTH` @@ -475,17 +481,17 @@ Usage: Also available as API arguments: - `benchmark_script(onnx_opset=...)` -- `benchmark_models(onnx_opset=...)` +- `benchmark_workloads(onnx_opset=...)` > _Note_: ONNX opset can also be set by an environment variable. The --onnx-opset argument takes precedence over the environment variable. See [MLAGILITY_ONNX_OPSET](#set-the-onnx-opset). ### Analyze Only -Instruct `benchit` or `benchmark_model()` to only run the [Analysis](#analysis) phase of the `benchmark` command. +Instruct `benchit` or `benchmark_workload()` to only run the [Analysis](#analysis) phase of the `benchmark` command. Usage: - `benchit benchmark INPUT_FILES --analyze-only` - - This discovers models within the input script and prints information about them, but does not perform any build or benchmarking. + - This discovers workloads within the input script and prints information about them, but does not perform any build or benchmarking. > _Note_: any build- or benchmark-specific options will be ignored, such as `--backend`, `--device`, `--groqview`, etc. @@ -496,33 +502,33 @@ Also available as an API argument: ### Build Only -Instruct `benchit`, `benchmark_script()`, or `benchmark_model()` to only run the [Analysis](#analysis) and [Build](#build) phases of the `benchmark` command. +Instruct `benchit`, `benchmark_script()`, or `benchmark_workload()` to only run the [Analysis](#analysis) and [Build](#build) phases of the `benchmark` command. Usage: - `benchit benchmark INPUT_FILES --build-only` - - This builds the models within the input script, however does not run any benchmark. + - This builds the workloads within the input script, however does not run any benchmark. > _Note_: any benchmark-specific options will be ignored, such as `--backend`. Also available as API arguments: - `benchmark_script(build_only=True/False)` (default False) -- `benchmark_model(build_only=True/False)` (default False) +- `benchmark_workload(build_only=True/False)` (default False) > See the [Build Only tutorial](https://github.com/groq/mlagility/blob/main/examples/cli/build.md#build-only) for a detailed example. ### Export Only -Instruct `benchit`, `benchmark_script()`, or `benchmark_model()` to only run the [Analysis](#analysis) and [Build](#build) phases of the `benchmark` command, and to stop the Build phase after exporting the ONNX file. Similar to [Build Only](#build-only), except that no optimization Stages will be applied to the ONNX file. +Instruct `benchit`, `benchmark_script()`, or `benchmark_workload()` to only run the [Analysis](#analysis) and [Build](#build) phases of the `benchmark` command, and to stop the Build phase after exporting the ONNX file. Similar to [Build Only](#build-only), except that no optimization Stages will be applied to the ONNX file. Usage: - `benchit benchmark INPUT_FILES --export-only` - - This exports ONNX files for the models within the input script, however does not optimize those ONNX files nor run any benchmark. + - This exports ONNX files for the workloads within the input script, however does not optimize those ONNX files nor run any benchmark. > _Note_: any benchmark-specific options will be ignored, such as `--backend`. Also available as API arguments: - `benchmark_script(export_only=True/False)` (default False) -- `benchmark_model(export_only=True/False)` (default False) +- `benchmark_workload(export_only=True/False)` (default False) ### Resume @@ -532,7 +538,7 @@ For example: - `benchit benchmark INPUT_FILES` will benchmark everything in `INPUT_FILES`, regardless of whether benchmarking those scripts has been attempted previously. - `benchit benchmark INPUT_FILES --resume` will benchmark everything `INPUT_FILES` that has not been previously attempted. -The `--resume` behavior is useful for when you are benchmarking a large corpus of models, and one of the models crashes your run. If you repeat the same command, but with the `--resume` argument, then the new run will pick up where the last run left off, including skipping over any input scripts that crashed previously. +The `--resume` behavior is useful for when you are benchmarking a large corpus of workloads, and one of the workloads crashes your run. If you repeat the same command, but with the `--resume` argument, then the new run will pick up where the last run left off, including skipping over any input scripts that crashed previously. > _Note_: if `--resume` is skipping over any input scripts that you *do* want to evaluate, you have two options: > - Manually build the input script with `benchit benchmark INPUT_SCRIPT` without setting `--resume` @@ -545,13 +551,13 @@ Also available as an API argument: The following options are specific to Groq builds and benchmarks, and are passed into the [GroqFlow build tool](https://github.com/groq/groqflow). Learn more about them in the [GroqFlow user guide](https://github.com/groq/groqflow/blob/main/docs/user_guide.md). - `--groq-compiler-flags COMPILER_FLAGS [COMPILER_FLAGS ...]` Sets the groqit(compiler_flags=...) arg within the GroqFlow build tool (default behavior is to use groqit()'s default compiler flags) - - Also available as API arguments: `benchmark_script(groq_compiler_flags=...)`, `benchmark_model(groq_compiler_flags=...)`. + - Also available as API arguments: `benchmark_script(groq_compiler_flags=...)`, `benchmark_workload(groq_compiler_flags=...)`. - `--groq-assembler-flags ASSEMBLER_FLAGS [ASSEMBLER_FLAGS ...]` Sets the groqit(assembler_flags=...) arg within the GroqFlow build tool (default behavior is to use groqit()'s default assembler flags) - - Also available as API arguments: `benchmark_script(groq_assembler_flags=...)`, `benchmark_model(groq_assembler_flags=...)`. + - Also available as API arguments: `benchmark_script(groq_assembler_flags=...)`, `benchmark_workload(groq_assembler_flags=...)`. - `--groq-num-chips NUM_CHIPS` Sets the groqit(num_chips=...) arg (default behavior is to let groqit() automatically select the number of chips) - - Also available as API arguments: `benchmark_script(groq_num_chips=...)`, `benchmark_model(groq_num_chips=...)`. + - Also available as API arguments: `benchmark_script(groq_num_chips=...)`, `benchmark_workload(groq_num_chips=...)`. - `--groqview` Enables GroqView for the build(s) - - Also available as API arguments: `benchmark_script(groqview=True/False,)`, `benchmark_model(groqview=True/False,)`. + - Also available as API arguments: `benchmark_script(groqview=True/False,)`, `benchmark_workload(groqview=True/False,)`. ## Cache Commands @@ -652,7 +658,7 @@ export MLAGILITY_CACHE_DIR=~/a_different_cache_dir ### Show Traceback -By default, `benchit` and `benchmark_script()` will display the traceback for any exceptions caught during model build. However, you may sometimes want a cleaner output on your terminal. To accomplish this, set the `MLAGILITY_TRACEBACK` environment variable to `False`, which will catch any exceptions during model build and benchmark and display a simple error message like `Status: Unknown benchit error: {e}`. +By default, `benchit` and `benchmark_script()` will display the traceback for any exceptions caught during workload build. However, you may sometimes want a cleaner output on your terminal. To accomplish this, set the `MLAGILITY_TRACEBACK` environment variable to `False`, which will catch any exceptions during workload build and benchmark and display a simple error message like `Status: Unknown benchit error: {e}`. For example: @@ -662,7 +668,7 @@ export MLAGILITY_TRACEBACK=False ### Preserve Terminal Outputs -By default, `benchit` and `benchmark_script()` will erase the contents of the terminal in order to present a clean status update for each script and model evaluated. +By default, `benchit` and `benchmark_script()` will erase the contents of the terminal in order to present a clean status update for each script and workload evaluated. However, you may want to see everything that is being printed to the terminal. You can accomplish this by setting the `MLAGILITY_DEBUG` environment variable to `True`. For example: @@ -672,7 +678,7 @@ export MLAGILITY_DEBUG=True ### Set the ONNX Opset -By default, `benchit`, `benchmark_script()`, and `benchmark_model()` will use the default ONNX opset defined in `onnxflow.common.build.DEFAULT_ONNX_OPSET`. You can set a different default ONNX opset by setting the `MLAGILITY_ONNX_OPSET` environment variable. +By default, `benchit`, `benchmark_script()`, and `benchmark_workload()` will use the default ONNX opset defined in `onnxflow.common.build.DEFAULT_ONNX_OPSET`. You can set a different default ONNX opset by setting the `MLAGILITY_ONNX_OPSET` environment variable. For example: diff --git a/examples/cli/build.md b/examples/cli/build.md index c52b8181..bcd07060 100644 --- a/examples/cli/build.md +++ b/examples/cli/build.md @@ -6,7 +6,7 @@ This chapter of the `benchit` CLI tutorial focuses on techniques to customize th The tutorial chapters are: 1. [Getting Started](https://github.com/groq/mlagility/blob/main/examples/cli/readme.md) -1. [Guiding Model Discovery](https://github.com/groq/mlagility/blob/main/examples/cli/discovery.md): `benchit` arguments that customize the model discovery process to help streamline your workflow. +1. [Guiding Workload Discovery](https://github.com/groq/mlagility/blob/main/examples/cli/discovery.md): `benchit` arguments that customize the workload discovery process to help streamline your workflow. 1. [Working with the Cache](https://github.com/groq/mlagility/blob/main/examples/cli/cache.md): `benchit` arguments and commands that help you understand, inspect, and manipulate the `mlagility cache`. 1. Customizing Builds (this document): `benchit` arguments that customize build behavior to unlock new workflows. @@ -16,7 +16,7 @@ All of the tutorials assume that your current working directory is in the same l ## Build Only -`benchit` provides the `--build-only` argument for when you want to analyze and build the models in a script, without actually benchmarking them. +`benchit` provides the `--build-only` argument for when you want to analyze and build the workloads in a script, without actually benchmarking them. You can try it out with this command: @@ -43,7 +43,7 @@ pytorch_outputs: tensor([-0.1675, 0.1548, -0.1627, 0.0067, 0.3353], grad_fn=< Woohoo! The 'benchmark' command is complete. ``` -You can see that the model is discovered and built, but no benchmark took place. +You can see that the workload is discovered and built, but no benchmark took place. > See the [Build Only documentation](https://github.com/groq/mlagility/blob/main/docs/tools_user_guide.md#build-only) for more details. @@ -51,7 +51,7 @@ You can see that the model is discovered and built, but no benchmark took place. You can customize the behavior of the [Build](https://github.com/groq/mlagility/blob/main/docs/tools_user_guide.md#build) stage of `benchit` by creating a custom `Sequence`. -A `Sequence` tells the `benchmark_model()` API within `benchit` how to `build` a model to prepare it for benchmarking. +A `Sequence` tells the `benchmark_workload()` API within `benchit` how to `build` a model to prepare it for benchmarking. The default `Sequence` for CPU and GPU benchmarking performs the following build steps: 1. Export the model to an ONNX file @@ -97,6 +97,6 @@ If we then repeat the `benchit cache stats hello_world_479b1332` we will see tha Now that you have completed this tutorial, make sure to check out the other tutorials if you want to learn more: 1. [Getting Started](https://github.com/groq/mlagility/blob/main/examples/cli/readme.md) -1. [Guiding Model Discovery](https://github.com/groq/mlagility/blob/main/examples/cli/discovery.md): `benchit` arguments that customize the model discovery process to help streamline your workflow. +1. [Guiding Workload Discovery](https://github.com/groq/mlagility/blob/main/examples/cli/discovery.md): `benchit` arguments that customize the workload discovery process to help streamline your workflow. 1. [Working with the Cache](https://github.com/groq/mlagility/blob/main/examples/cli/cache.md): `benchit` arguments and commands that help you understand, inspect, and manipulate the `mlagility cache`. 1. Customizing Builds (this document): `benchit` arguments that customize build behavior to unlock new workflows. \ No newline at end of file diff --git a/examples/cli/cache.md b/examples/cli/cache.md index e1fc841e..30393230 100644 --- a/examples/cli/cache.md +++ b/examples/cli/cache.md @@ -10,7 +10,7 @@ This chapter of the `benchit` CLI tutorials is focused on understanding, inspect The tutorial chapters are: 1. [Getting Started](https://github.com/groq/mlagility/blob/main/examples/cli/readme.md) -1. [Guiding Model Discovery](https://github.com/groq/mlagility/blob/main/examples/cli/discovery.md): `benchit` arguments that customize the model discovery process to help streamline your workflow. +1. [Guiding Workload Discovery](https://github.com/groq/mlagility/blob/main/examples/cli/discovery.md): `benchit` arguments that customize the workload discovery process to help streamline your workflow. 1. Working with the Cache (this document): `benchit` arguments and commands that help you understand, inspect, and manipulate the `mlagility cache`. 1. [Customizing Builds](https://github.com/groq/mlagility/blob/main/examples/cli/build.md): `benchit` arguments that customize build behavior to unlock new workflows. @@ -191,6 +191,6 @@ total 20K Now that you have completed this tutorial, make sure to check out the other tutorials if you want to learn more: 1. [Getting Started](https://github.com/groq/mlagility/blob/main/examples/cli/readme.md) -1. [Guiding Model Discovery](https://github.com/groq/mlagility/blob/main/examples/cli/discovery.md): `benchit` arguments that customize the model discovery process to help streamline your workflow. +1. [Guiding Workload Discovery](https://github.com/groq/mlagility/blob/main/examples/cli/discovery.md): `benchit` arguments that customize the workload discovery process to help streamline your workflow. 1. Working with the Cache (this document): `benchit` arguments and commands that help you understand, inspect, and manipulate the `mlagility cache`. 1. [Customizing Builds](https://github.com/groq/mlagility/blob/main/examples/cli/build.md): `benchit` arguments that customize build behavior to unlock new workflows. \ No newline at end of file diff --git a/examples/cli/discovery.md b/examples/cli/discovery.md index 8efe2e56..2e9ba495 100644 --- a/examples/cli/discovery.md +++ b/examples/cli/discovery.md @@ -1,24 +1,24 @@ -# Guiding Model Discovery +# Guiding Workload Discovery -This chapter of the `benchit` CLI tutorial is focused on how to guide the tool as it discovers models. You will learn things such as: -- [How to run model discovery, without spending time on builds or benchmarking](#analyze-only) -- [How to benchmark all the models in all the scripts in a directory](#benchmark-multiple-scripts) -- [How to analyze the building blocks of a model](#maximum-analysis-depth) -- [How to filter which models are passed to the build and benchmark operations](#filtering-model-hashes) +This chapter of the `benchit` CLI tutorial is focused on how to guide the tool as it discovers workloads. You will learn things such as: +- [How to run workload discovery, without spending time on builds or benchmarking](#analyze-only) +- [How to benchmark all the workloads in all the scripts in a directory](#benchmark-multiple-scripts) +- [How to analyze the building blocks of a workload](#maximum-analysis-depth) +- [How to filter which workloads are passed to the build and benchmark operations](#filtering-workload-hashes) The tutorial chapters are: 1. [Getting Started](https://github.com/groq/mlagility/blob/main/examples/cli/readme.md) -1. Guiding Model Discovery (this document): `benchit` arguments that customize the model discovery process to help streamline your workflow. +1. Guiding Workload Discovery (this document): `benchit` arguments that customize the workload discovery process to help streamline your workflow. 1. [Working with the Cache](https://github.com/groq/mlagility/blob/main/examples/cli/cache.md): `benchit` arguments and commands that help you understand, inspect, and manipulate the `mlagility cache`. 1. [Customizing Builds](https://github.com/groq/mlagility/blob/main/examples/cli/build.md): `benchit` arguments that customize build behavior to unlock new workflows. -# Model Discovery Tutorials +# Workload Discovery Tutorials All of the tutorials assume that your current working directory is in the same location as this readme file (`examples/cli`). ## Analyze Only -`benchit` provides the `--analyze-only` argument for when you want to analyze the models in a script, without actually building or benchmarking them. +`benchit` provides the `--analyze-only` argument for when you want to analyze the workloads in a script, without actually building or benchmarking them. You can try it out with this command: @@ -29,7 +29,7 @@ benchit benchmark scripts/hello_world.py --analyze-only Which gives a result like: ``` -Models discovered during profiling: +Workloads discovered during profiling: hello_world.py: pytorch_model (executed 1x - 0.00s) @@ -44,13 +44,13 @@ pytorch_outputs: tensor([-0.1675, 0.1548, -0.1627, 0.0067, 0.3353], grad_fn=< Woohoo! The 'benchmark' command is complete. ``` -You can see that the model is discovered, and some stats are printed, but no build or benchmark took place. +You can see that the workload is discovered, and some stats are printed, but no build or benchmark took place. > See the [Analyze Only documentation](https://github.com/groq/mlagility/blob/main/docs/tools_user_guide.md#analyze-only) for more details. ## Benchmark Multiple Scripts -If you want to benchmark an entire corpus of models, but you don't want to call `benchit` individually on each model you may provide more than one python file to benchit at a time. +If you want to benchmark an entire corpus of models, but you don't want to call `benchit` individually on each python file you may provide more than one python file to benchit at a time. For example, the command: @@ -64,11 +64,11 @@ or the command benchit scripts/*.py ``` -Will iterate over every model in every script in the `scripts` directory, producing a result like this: +Will iterate over every workload in every script in the `scripts` directory, producing a result like this: ``` -Models discovered during profiling: +Workloads discovered during profiling: hello_world.py: pytorch_model (executed 1x) @@ -77,7 +77,7 @@ hello_world.py: Location: /home/jfowers/mlagility/examples/cli/scripts/hello_world.py, line 29 Parameters: 55 (<0.1 MB) Hash: 479b1332 - Status: Model successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz + Status: Successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz Mean Latency: 0.000 milliseconds (ms) Throughput: 657792.2 inferences per second (IPS) @@ -88,7 +88,7 @@ two_models.py: Location: /home/jfowers/mlagility/examples/cli/scripts/two_models.py, line 40 Parameters: 510 (<0.1 MB) Hash: 215ca1e3 - Status: Model successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz + Status: Successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz Mean Latency: 0.000 milliseconds (ms) Throughput: 509528.6 inferences per second (IPS) @@ -99,7 +99,7 @@ max_depth.py: Location: /home/jfowers/mlagility/examples/cli/scripts/max_depth.py, line 41 Parameters: 85 (<0.1 MB) Hash: 80b93950 - Status: Model successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz + Status: Successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz Mean Latency: 0.000 milliseconds (ms) Throughput: 693955.3 inferences per second (IPS) @@ -135,7 +135,7 @@ benchit benchmark scripts/max_depth.py --max-depth 1 You get a result like: ``` -Models discovered during profiling: +Workloads discovered during profiling: max_depth.py: pytorch_model (executed 1x) @@ -144,7 +144,7 @@ max_depth.py: Location: /home/jfowers/mlagility/examples/cli/scripts/max_depth.py, line 41 Parameters: 85 (<0.1 MB) Hash: 80b93950 - Status: Model successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz + Status: Successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz Mean Latency: 0.000 milliseconds (ms) Throughput: 533884.4 inferences per second (IPS) @@ -154,7 +154,7 @@ max_depth.py: Class: Linear () Parameters: 55 (<0.1 MB) Hash: 6d5eb4ee - Status: Model successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz + Status: Successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz Mean Latency: 0.000 milliseconds (ms) Throughput: 809701.4 inferences per second (IPS) @@ -163,7 +163,7 @@ max_depth.py: Class: Linear () Parameters: 30 (<0.1 MB) Hash: d4b2ffa7 - Status: Model successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz + Status: Successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz Mean Latency: 0.000 milliseconds (ms) Throughput: 677945.2 inferences per second (IPS) ``` @@ -174,22 +174,22 @@ You can see that the two instances of `torch.nn.Linear`, `fc` and `fc2`, are ben -## Filtering Model Hashes +## Filtering Workload Hashes -When you ran the example from the [Multiple Models per Script](https://github.com/groq/mlagility/blob/main/examples/cli/readme.md#multiple-models-per-script) tutorial, you saw that `benchit` discovered, built, and benchmarked two models. What if you only wanted to build and benchmark one of the models? +When you ran the example from the [Multiple Workloads per Script](https://github.com/groq/mlagility/blob/main/examples/cli/readme.md#multiple-workloads-per-script) tutorial, you saw that `benchit` discovered, built, and benchmarked two workloads. What if you only wanted to build and benchmark one of the workloads? -You can leverage the model hashes feature of `benchit` to filter which models are acted on. You can see in the result from [Multiple Models per Script](https://github.com/groq/mlagility/blob/main/examples/cli/readme.md#multiple-models-per-script) that the two models, `pytorch_model` and `another_pytorch_model`, have hashes `479b1332` and `215ca1e3`, respectively. +You can leverage the workload hashes feature of `benchit` to filter which workloads are acted on. You can see in the result from [Multiple Workloads per Script](https://github.com/groq/mlagility/blob/main/examples/cli/readme.md#multiple-workloads-per-script) that the two workloads, `pytorch_model` and `another_pytorch_model`, have hashes `f93db89f` and `c69b7dea`, respectively. -If you wanted to only build and benchmark `another_pytorch_model`, you could use this command, which filters `two_models.py` with the hash `215ca1e3`: +If you wanted to only build and benchmark `another_pytorch_model`, you could use this command, which filters `two_models.py` with the hash `c69b7dea`: ``` -benchit benchmark scripts/two_models.py::215ca1e3 +benchit benchmark scripts/two_models.py::c69b7dea ``` That would produce a result like: ``` -Models discovered during profiling: +Workloads discovered during profiling: two_models.py: pytorch_model (executed 1x) @@ -197,15 +197,15 @@ two_models.py: Class: SmallModel () Location: /home/jfowers/mlagility/examples/cli/scripts/two_models.py, line 32 Parameters: 55 (<0.1 MB) - Hash: 479b1332 + Hash: f93db89f another_pytorch_model (executed 1x) Model Type: Pytorch (torch.nn.Module) Class: SmallModel () Location: /home/jfowers/mlagility/examples/cli/scripts/two_models.py, line 40 Parameters: 510 (<0.1 MB) - Hash: 215ca1e3 - Status: Model successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz + Hash: c69b7dea + Status: Successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz Mean Latency: 0.000 milliseconds (ms) Throughput: 499272.2 inferences per second (IPS) @@ -216,15 +216,15 @@ more_pytorch_outputs: tensor([-0.1198, -0.5344, -0.1920, -0.1565, 0.2279, 0.69 Woohoo! The 'benchmark' command is complete. ``` -You can see that both models are discovered, but only `another_pytorch_model` was built and benchmarked. +You can see that both workloads are discovered, but only `another_pytorch_model` was built and benchmarked. > See the [Input Script documentation](https://github.com/groq/mlagility/blob/main/docs/tools_user_guide.md#input-script) for more details. -## Filtering Model Labels +## Filtering Script Labels -You can also leverage the labels feature of `benchit` to filter which models are acted on. Labels are pragmas added by the user to the first line of a `.py` file to list some of the attributes of that given script. `hello_world.py`, for example has the label `test_group::a`, while `two_models.py` and `max_depth.py` have the label `test_group::b`. +You can also leverage the labels feature of `benchit` to filter which scripts are acted on. Labels are pragmas added by the user to the first line of a `.py` file to list some of the attributes of that given script. `hello_world.py`, for example has the label `test_group::a`, while `two_models.py` and `max_depth.py` have the label `test_group::b`. -If you wanted to only build and benchmark models that have the label `test_group::a`, you could use the command: +If you wanted to only build and benchmark scripts that have the label `test_group::a`, you could use the command: ``` benchit scripts/*.py --labels test_group::a @@ -233,7 +233,7 @@ benchit scripts/*.py --labels test_group::a That would produce a result like: ``` -Models discovered during profiling: +Workloads discovered during profiling: hello_world.py: pytorch_model (executed 1x) @@ -242,7 +242,7 @@ hello_world.py: Location: /net/home/dhnoronha/mlagility/examples/cli/scripts/hello_world.py, line 30 Parameters: 55 (<0.1 MB) Hash: 479b1332 - Status: Model successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz + Status: Successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz Mean Latency: 0.000 milliseconds (ms) Throughput: 490444.1 inferences per second (IPS) @@ -255,6 +255,6 @@ Woohoo! The 'benchmark' command is complete. Now that you have completed this tutorial, make sure to check out the other tutorials if you want to learn more: 1. [Getting Started](https://github.com/groq/mlagility/blob/main/examples/cli/readme.md) -1. Guiding Model Discovery (this document): `benchit` arguments that customize the model discovery process to help streamline your workflow. +1. Guiding Workload Discovery (this document): `benchit` arguments that customize the model discovery process to help streamline your workflow. 1. [Working with the Cache](https://github.com/groq/mlagility/blob/main/examples/cli/cache.md): `benchit` arguments and commands that help you understand, inspect, and manipulate the `mlagility cache`. 1. [Customizing Builds](https://github.com/groq/mlagility/blob/main/examples/cli/build.md): `benchit` arguments that customize build behavior to unlock new workflows. diff --git a/examples/cli/extras/example_sequence.py b/examples/cli/extras/example_sequence.py index 0af4fe28..946da4ea 100644 --- a/examples/cli/extras/example_sequence.py +++ b/examples/cli/extras/example_sequence.py @@ -1,7 +1,7 @@ """ This script is an example of a sequence.py file. Such a sequence.py file can be used to redefine the build phase of the benchit CLI, benchmark_script(), -and benchmark_model() to have any custom behavior. +and benchmark_workload() to have any custom behavior. In this example sequence.py file we are setting the build sequence to simply export from pytorch to ONNX. This differs from the default build sequence, which diff --git a/examples/cli/readme.md b/examples/cli/readme.md index 6b833ea3..69400043 100644 --- a/examples/cli/readme.md +++ b/examples/cli/readme.md @@ -8,7 +8,7 @@ Once you've familiarized yourself with these features, head over to the [`models The tutorials are organized into a few chapters: 1. Getting Started (this document) -1. [Guiding Model Discovery](https://github.com/groq/mlagility/blob/main/examples/cli/discovery.md): `benchit` arguments that customize the model discovery process to help streamline your workflow. +1. [Guiding Workload Discovery](https://github.com/groq/mlagility/blob/main/examples/cli/discovery.md): `benchit` arguments that customize the model discovery process to help streamline your workflow. 1. [Working with the Cache](https://github.com/groq/mlagility/blob/main/examples/cli/cache.md): `benchit` arguments and commands that help you understand, inspect, and manipulate the `mlagility cache`. 1. [Customizing Builds](https://github.com/groq/mlagility/blob/main/examples/cli/build.md): `benchit` arguments that customize build behavior to unlock new workflows. @@ -16,7 +16,7 @@ In this tutorial you will learn things such as: - [How to benchmark BERT with one command](#just-benchmark-bert) - [A "hello world" example, which is the easiest way to get started](#hello-world) - [Benchmarking on Nvidia GPUs](#nvidia-benchmarking) -- [Working with scripts that invoke more than one model](#multiple-models-per-script) +- [Working with scripts that invoke more than one workload](#multiple-workloads-per-script) - [Benchmarking an ONNX file](#onnx-benchmarking) # Just Benchmark BERT @@ -33,7 +33,7 @@ benchit $models/transformers/bert.py This will produce a result that looks like this, which shows you the performance of BERT-Base on your CPU: ``` -Models discovered during profiling: +Workloads discovered during profiling: bert.py: model (executed 1x) @@ -66,14 +66,14 @@ That commands `benchit` benchmark `hello_world.py` on your CPU. Specifically, `b 1. Pass `scripts/hello_world.py` as the input_script to the `benchmark` command of `benchit`. - _Note_: `benchit <.py file>` is a shorthand for `benchit benchmark <.py file>`. 1. Run `hello_world.py` against a profiler and look for models from supported machine learning frameworks (e.g. Pytorch). -1. Discover the `pytorch_model` instance of class `SmallModel`, which is a PyTorch model, and print some statistics about it. -1. Export `pytorch_model` to an ONNX file, optimize that ONNX file, and convert it to the `float16` data type. -1. Measure the performance of the ONNX file on your x86 CPU and report the `mean latency` and `throughput`. +2. Discover the `pytorch_model` instance of class `SmallModel` and print some statistics about it. +3. Export `pytorch_model` to an ONNX file, optimize that ONNX file, and convert it to the `float16` data type. +4. Measure the performance of the ONNX file on your x86 CPU and report the `mean latency` and `throughput`. The result looks like this: ``` -Models discovered during profiling: +Workloads discovered during profiling: hello_world.py: pytorch_model (executed 1x) @@ -82,7 +82,7 @@ hello_world.py: Location: /home/jfowers/mlagility/examples/cli/hello_world.py, line 29 Parameters: 55 (<0.1 MB) Hash: 479b1332 - Status: Model successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz + Status: Successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz Mean Latency: 0.001 milliseconds (ms) Throughput: 185964.8 inferences per second (IPS) @@ -106,7 +106,7 @@ benchit scripts/hello_world.py --device nvidia To get a result like this: ``` -Models discovered during profiling: +Workloads discovered during profiling: hello_world.py: pytorch_model (executed 1x) @@ -115,7 +115,7 @@ hello_world.py: Location: /home/jfowers/mlagility/examples/cli/hello_world.py, line 29 Parameters: 55 (<0.1 MB) Hash: 479b1332 - Status: Model successfully benchmarked on NVIDIA A100-SXM4-40GB + Status: Successfully benchmarked on NVIDIA A100-SXM4-40GB Mean Latency: 0.027 milliseconds (ms) Throughput: 21920.5 inferences per second (IPS) @@ -126,9 +126,9 @@ Woohoo! The 'benchmark' command is complete. You can see that the device mentioned in the status is a `NVIDIA A100-SXM4-40GB`. -## Multiple Models per Script +## Multiple Workloads per Script -The MLAgility tools will benchmark all models discovered in the input script. We can demonstrate this with the `two_models.py` script. +The MLAgility tools will benchmark all workloads discovered in the input script. We can demonstrate this with the `two_models.py` script. Run the following command: @@ -139,7 +139,7 @@ benchit scripts/two_models.py To get a result like: ``` -Models discovered during profiling: +Workloads discovered during profiling: two_models.py: pytorch_model (executed 1x) @@ -148,7 +148,7 @@ two_models.py: Location: /home/jfowers/mlagility/examples/cli/scripts/two_models.py, line 32 Parameters: 55 (<0.1 MB) Hash: 479b1332 - Status: Model successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz + Status: Successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz Mean Latency: 0.000 milliseconds (ms) Throughput: 640717.1 inferences per second (IPS) @@ -158,7 +158,7 @@ two_models.py: Location: /home/jfowers/mlagility/examples/cli/scripts/two_models.py, line 40 Parameters: 510 (<0.1 MB) Hash: 215ca1e3 - Status: Model successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz + Status: Successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz Mean Latency: 0.000 milliseconds (ms) Throughput: 642021.1 inferences per second (IPS) @@ -169,7 +169,7 @@ more_pytorch_outputs: tensor([-0.1198, -0.5344, -0.1920, -0.1565, 0.2279, 0.69 Woohoo! The 'benchmark' command is complete. ``` -You can see that both model instances in `two_models.py`, `pytorch_model` and `another_pytorch_model`, are both discovered and benchmarked. +You can see that both workloads in `two_models.py`, `pytorch_model` and `another_pytorch_model`, are discovered and benchmarked. ## ONNX Benchmarking @@ -200,6 +200,6 @@ Info: Performance of build sample on x86 device Intel(R) Xeon(R) CPU @ 2.20GHz i # Thanks! Now that you have completed this tutorial, make sure to check out the other tutorials if you want to learn more: -1. [Guiding Model Discovery](https://github.com/groq/mlagility/blob/main/examples/cli/discovery.md): `benchit` arguments that customize the model discovery process to help streamline your workflow. +1. [Guiding Workload Discovery](https://github.com/groq/mlagility/blob/main/examples/cli/discovery.md): `benchit` arguments that customize the model discovery process to help streamline your workflow. 1. [Working with the Cache](https://github.com/groq/mlagility/blob/main/examples/cli/cache.md): `benchit` arguments and commands that help you understand, inspect, and manipulate the `mlagility cache`. 1. [Customizing Builds](https://github.com/groq/mlagility/blob/main/examples/cli/cache.md): `benchit` arguments that customize build behavior to unlock new workflows. \ No newline at end of file diff --git a/examples/model_api/hello_world.py b/examples/model_api/hello_world.py index 4553d3f2..bbd0be14 100644 --- a/examples/model_api/hello_world.py +++ b/examples/model_api/hello_world.py @@ -1,6 +1,6 @@ import argparse import torch -from mlagility import benchmark_model +from mlagility import benchmark_workload torch.manual_seed(0) @@ -56,7 +56,7 @@ def main(): # Benchmark the model on the specified device and backend print(f"Benchmarking on {args.device} {args.backend}...") - benchmark_model( + benchmark_workload( pytorch_model, inputs, build_name="hello_api_world", diff --git a/examples/readme.md b/examples/readme.md index e9e8aebb..0d05e36d 100644 --- a/examples/readme.md +++ b/examples/readme.md @@ -2,6 +2,6 @@ This directory contains examples to help you learn how to use the MLAgility tools. The examples are split up into two sub-directories: 1. `examples/cli`: a tutorial series for the `benchit` CLI. This is the recommended starting point. -1. `examples/model_api`: scripts that demonstrate how to use the `mlagility.benchmark_model()` API. +1. `examples/model_api`: scripts that demonstrate how to use the `mlagility.benchmark_workload()` API. 1. `examples/script_api`: scripts that demonstrate how to use the `mlagility.benchmark_script()` API. 1. `examples/onnxflow`: scripts that demonstrate how to use the `onnxflow.buildit()` API. diff --git a/models/llm_layer/llama_layer_prototype.py b/models/llm_layer/llama_layer_prototype.py index 6fc42795..c61c5219 100644 --- a/models/llm_layer/llama_layer_prototype.py +++ b/models/llm_layer/llama_layer_prototype.py @@ -12,10 +12,6 @@ def call_llama_layer(params="7B", use_cache=False): - # Use different torch seeds for KV caching vs. not, so that - # the models end up with different mlagility hashes - # Remove the if-statement when - # https://github.com/groq/mlagility/issues/316 is fixed torch.manual_seed(0) # Parsing command-line arguments diff --git a/src/mlagility/__init__.py b/src/mlagility/__init__.py index f92ad481..8c5443f7 100644 --- a/src/mlagility/__init__.py +++ b/src/mlagility/__init__.py @@ -1,5 +1,5 @@ from mlagility.version import __version__ from .api.script_api import benchmark_script -from .api.model_api import benchmark_model +from .api.model_api import benchmark_workload from .cli.cli import main as benchitcli diff --git a/src/mlagility/analysis/analysis.py b/src/mlagility/analysis/analysis.py index 125520f3..05c39406 100644 --- a/src/mlagility/analysis/analysis.py +++ b/src/mlagility/analysis/analysis.py @@ -21,7 +21,7 @@ import mlagility.analysis.util as util import mlagility.analysis.tf_helpers as tf_helpers import mlagility.common.labels as labels -from mlagility.api.model_api import benchmark_model +from mlagility.api.model_api import benchmark_workload import mlagility.common.filesystem as filesystem @@ -135,7 +135,7 @@ def explore_workload( ) workload_info.status_message_color = printing.Colors.WARNING else: - perf = benchmark_model( + perf = benchmark_workload( model_info.model, inputs, device=tracer_args.device, @@ -154,11 +154,11 @@ def explore_workload( onnx_opset=tracer_args.onnx_opset, ) if Action.BENCHMARK in tracer_args.actions: - workload_info.status_message = "Model successfully benchmarked!" + workload_info.status_message = "Successfully benchmarked!" workload_info.performance = perf workload_info.status_message_color = printing.Colors.OKGREEN else: - workload_info.status_message = "Model successfully built!" + workload_info.status_message = "Successfully built!" workload_info.status_message_color = printing.Colors.OKGREEN except exp.StageError: @@ -473,7 +473,7 @@ def forward_spy(*args, **kwargs): model_info.workloads[workload_hash] = util.WorkloadInfo( hash=workload_hash, is_target=workload_hash in tracer_args.targets - or tracer_args.targets == [], + or len(tracer_args.targets) == 0, input_shapes=input_shapes, parent_hash=parent_workload_hash, ) diff --git a/src/mlagility/analysis/status.py b/src/mlagility/analysis/status.py index 265d9a10..14397650 100644 --- a/src/mlagility/analysis/status.py +++ b/src/mlagility/analysis/status.py @@ -13,7 +13,7 @@ def update(models_found: Dict[str, ModelInfo]) -> None: os.system("clear") printing.logn( - "\nModels discovered during profiling:\n", + "\nWorkloads discovered during profiling:\n", c=printing.Colors.BOLD, ) recursive_print(models_found, None, None, []) diff --git a/src/mlagility/api/model_api.py b/src/mlagility/api/model_api.py index dc664019..d8e43d73 100644 --- a/src/mlagility/api/model_api.py +++ b/src/mlagility/api/model_api.py @@ -22,7 +22,7 @@ MLAGILITY_DEFAULT_REBUILD_POLICY = "if_needed" -def benchmark_model( +def benchmark_workload( model: Any, inputs: Dict[str, Any], build_name: str, diff --git a/src/mlagility/api/script_api.py b/src/mlagility/api/script_api.py index 50076a0d..502601ac 100644 --- a/src/mlagility/api/script_api.py +++ b/src/mlagility/api/script_api.py @@ -11,7 +11,7 @@ import mlagility.cli.spawn as spawn import mlagility.common.filesystem as filesystem import mlagility.common.labels as labels_library -from mlagility.api.model_api import benchmark_model +from mlagility.api.model_api import benchmark_workload from mlagility.api.devices import SUPPORTED_DEVICES, DEFAULT_RUNTIME from mlagility.analysis.analysis import ( evaluate_script, @@ -312,7 +312,7 @@ def benchmark_files( """ Inspect the input_files and sort them into .py and .onnx files. - Pass .py files into benchmark_script() and .onnx files into benchmark_model(). + Pass .py files into benchmark_script() and .onnx files into benchmark_workload(). """ python_scripts = [] @@ -352,7 +352,7 @@ def benchmark_files( groqview=groqview, ) - # Iterate and pass each ONNX file into benchmark_model() one at a time + # Iterate and pass each ONNX file into benchmark_workload() one at a time for onnx_file in onnx_files: build_name = filesystem.clean_script_name(onnx_file) @@ -370,7 +370,7 @@ def benchmark_files( ) for runtime in runtimes: - benchmark_model( + benchmark_workload( model=onnx_file, inputs=None, build_name=build_name, diff --git a/test/model_api.py b/test/model_api.py index 3eb6014d..72a27ad2 100644 --- a/test/model_api.py +++ b/test/model_api.py @@ -8,7 +8,7 @@ import onnxflow.common.cache as cache import onnxflow.justbuildit.export as export import onnxflow.common.build as build -from mlagility import benchmark_model +from mlagility import benchmark_workload class SmallPytorchModel(torch.nn.Module): @@ -62,7 +62,7 @@ def setUp(self) -> None: def test_001_build_pytorch_model(self): build_name = "build_pytorch_model" - benchmark_model( + benchmark_workload( pytorch_model, inputs, build_name=build_name, @@ -104,7 +104,7 @@ def fire(self, state): ], ) - benchmark_model( + benchmark_workload( pytorch_model, inputs, build_name=build_name, @@ -119,7 +119,7 @@ def fire(self, state): def test_003_local_benchmark(self): build_name = "local_benchmark" - perf = benchmark_model( + perf = benchmark_workload( pytorch_model, inputs, device="x86", @@ -147,7 +147,7 @@ def test_004_onnx_opset(self): user_opset = 15 assert user_opset != build.DEFAULT_ONNX_OPSET - perf = benchmark_model( + perf = benchmark_workload( pytorch_model, inputs, device="x86", From 60862bd58d1bef87fc7f3b5c3e66f8e81f8616bf Mon Sep 17 00:00:00 2001 From: Daniel Holanda Noronha Date: Wed, 21 Jun 2023 11:09:43 -0700 Subject: [PATCH 25/35] Fix CI --- test/analysis.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/analysis.py b/test/analysis.py index 53171c64..d7d991c1 100644 --- a/test/analysis.py +++ b/test/analysis.py @@ -201,9 +201,9 @@ def run_analysis(args): output = run_cli(args) # Process outputs - output = output[output.rfind("Models discovered") :] + output = output[output.rfind("Workloads discovered") :] models_executed = output.count("(executed") - models_built = output.count("Model successfully built!") + models_built = output.count("Successfully built!") return models_executed, 0, models_built From cdfef11ed788e283c7de207b7f851feb112e220b Mon Sep 17 00:00:00 2001 From: Daniel Holanda Noronha Date: Wed, 21 Jun 2023 11:31:40 -0700 Subject: [PATCH 26/35] Fix slurm CI --- test/helpers/check_slurm_output.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/helpers/check_slurm_output.sh b/test/helpers/check_slurm_output.sh index f2431e46..350b2ff8 100755 --- a/test/helpers/check_slurm_output.sh +++ b/test/helpers/check_slurm_output.sh @@ -1,6 +1,6 @@ # Checks whether a slurm output contains any errors SLURM_OUTPUT="$1" -if ! grep -q "Model successfully built!" $SLURM_OUTPUT +if ! grep -q "Successfully built!" $SLURM_OUTPUT then cat $SLURM_OUTPUT echo "Model has not been successfully built" From 328938f32549a4756720c8f703d7526cd4a206c7 Mon Sep 17 00:00:00 2001 From: Daniel Holanda Noronha Date: Wed, 21 Jun 2023 13:56:08 -0700 Subject: [PATCH 27/35] Revert "Fix slurm CI" This reverts commit cdfef11ed788e283c7de207b7f851feb112e220b. --- test/helpers/check_slurm_output.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/helpers/check_slurm_output.sh b/test/helpers/check_slurm_output.sh index 350b2ff8..f2431e46 100755 --- a/test/helpers/check_slurm_output.sh +++ b/test/helpers/check_slurm_output.sh @@ -1,6 +1,6 @@ # Checks whether a slurm output contains any errors SLURM_OUTPUT="$1" -if ! grep -q "Successfully built!" $SLURM_OUTPUT +if ! grep -q "Model successfully built!" $SLURM_OUTPUT then cat $SLURM_OUTPUT echo "Model has not been successfully built" From 7c6668d4ed15a1fdb9d97938fa1b0788c1ab7ac2 Mon Sep 17 00:00:00 2001 From: Daniel Holanda Noronha Date: Wed, 21 Jun 2023 13:56:20 -0700 Subject: [PATCH 28/35] Revert "Fix CI" This reverts commit 60862bd58d1bef87fc7f3b5c3e66f8e81f8616bf. --- test/analysis.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/analysis.py b/test/analysis.py index d7d991c1..53171c64 100644 --- a/test/analysis.py +++ b/test/analysis.py @@ -201,9 +201,9 @@ def run_analysis(args): output = run_cli(args) # Process outputs - output = output[output.rfind("Workloads discovered") :] + output = output[output.rfind("Models discovered") :] models_executed = output.count("(executed") - models_built = output.count("Successfully built!") + models_built = output.count("Model successfully built!") return models_executed, 0, models_built From 265151241e34bd74d81fdfe4619f6bccf08a9dd0 Mon Sep 17 00:00:00 2001 From: Daniel Holanda Noronha Date: Wed, 21 Jun 2023 13:56:23 -0700 Subject: [PATCH 29/35] Revert "Change model to workload where appropriate" This reverts commit b4b6371758e074c9452e19530185dfa0c45d0c93. --- docs/code.md | 8 +- docs/readme.md | 2 +- docs/tools_user_guide.md | 148 +++++++++++----------- examples/cli/build.md | 10 +- examples/cli/cache.md | 4 +- examples/cli/discovery.md | 74 +++++------ examples/cli/extras/example_sequence.py | 2 +- examples/cli/readme.md | 34 ++--- examples/model_api/hello_world.py | 4 +- examples/readme.md | 2 +- models/llm_layer/llama_layer_prototype.py | 4 + src/mlagility/__init__.py | 2 +- src/mlagility/analysis/analysis.py | 10 +- src/mlagility/analysis/status.py | 2 +- src/mlagility/api/model_api.py | 2 +- src/mlagility/api/script_api.py | 8 +- test/model_api.py | 10 +- 17 files changed, 162 insertions(+), 164 deletions(-) diff --git a/docs/code.md b/docs/code.md index c7c0daa3..b18daf26 100644 --- a/docs/code.md +++ b/docs/code.md @@ -11,7 +11,7 @@ The MLAgility repository has a few major top-level directories: - `models`: the corpora of models that makes up the MLAgility benchmark (see [the models readme](https://github.com/groq/mlagility/blob/main/models/readme.md)). - Each subdirectory under `models` represents a corpus of models pulled from somewhere on the internet. For example, `models/torch_hub` is a corpus of models from [Torch Hub](https://github.com/pytorch/hub). - `src/mlagility`: source code for the MLAgility benchmarking tools (see [Benchmarking Tools](#benchmarking-tools) for a description of how the code is used). - - `src/mlagility/analysis`: functions for profiling a model script, discovering model instances, and invoking `benchmark_workload()` on those instances. + - `src/mlagility/analysis`: functions for profiling a model script, discovering model instances, and invoking `benchmark_model()` on those instances. - `src/mlagility/api`: implements the benchmarking APIs. - `src/mlagility/cli`: implements the `benchit` CLI. - `src/mlagility/common`: functions common to the other modules. @@ -29,10 +29,10 @@ MLAgility provides two main tools, the `benchit` CLI and benchmarking APIs. Inst 1. The default command for `benchit` CLI runs the `benchmark_script()` API, which is implemented in [api/script_api.py](https://github.com/groq/mlagility/blob/main/src/mlagility/api/script_api.py). - Other CLI commands are also implemented in `cli/`, for example the `report` command is implemented in `cli/report.py`. 1. The `benchmark_script()` API takes in a set of scripts, each of which should invoke at least one model instance, to evaluate and passes each into the `evaluate_script()` function for analysis, which is implemented in [analysis/analysis.py](https://github.com/groq/mlagility/blob/main/src/mlagility/analysis/analysis.py). -1. `evaluate_script()` uses a profiler to discover the model instances in the script, and passes each into the `benchmark_workload()` API, which is defined in [api/model_api.py](https://github.com/groq/mlagility/blob/main/src/mlagility/api/model_api.py). -1. The `benchmark_workload()` API prepares the model for benchmarking (e.g., exporting and optimizing an ONNX file), which creates an instance of a `*Model` class, where `*` can be CPU, GPU, etc. The `*Model` classes are defined in [api/ortmodel.py](https://github.com/groq/mlagility/blob/main/src/mlagility/api/ortmodel.py), [api/trtmodel.py](https://github.com/groq/mlagility/blob/main/src/mlagility/api/trtmodel.py), etc. +1. `evaluate_script()` uses a profiler to discover the model instances in the script, and passes each into the `benchmark_model()` API, which is defined in [api/model_api.py](https://github.com/groq/mlagility/blob/main/src/mlagility/api/model_api.py). +1. The `benchmark_model()` API prepares the model for benchmarking (e.g., exporting and optimizing an ONNX file), which creates an instance of a `*Model` class, where `*` can be CPU, GPU, etc. The `*Model` classes are defined in [api/ortmodel.py](https://github.com/groq/mlagility/blob/main/src/mlagility/api/ortmodel.py), [api/trtmodel.py](https://github.com/groq/mlagility/blob/main/src/mlagility/api/trtmodel.py), etc. 1. The `*Model` classes provide a `.benchmark()` method that benchmarks the model on the device and returns an instance of the `MeasuredPerformance` class, which includes the performance statistics acquired during benchmarking. -1. `benchmark_workload()` and the `*Model` classes are built using [`onnxflow.build_model()`](#model-build-tool) +1. `benchmark_model()` and the `*Model` classes are built using [`onnxflow.build_model()`](#model-build-tool) # Model Build Tool diff --git a/docs/readme.md b/docs/readme.md index d9f0326a..42dbaf72 100644 --- a/docs/readme.md +++ b/docs/readme.md @@ -3,7 +3,7 @@ This directory contains documentation for the MLAgility project: - [code.md](https://github.com/groq/mlagility/blob/main/docs/code.md): Code organization for the benchmark and tools. - [install.md](https://github.com/groq/mlagility/blob/main/docs/install.md): Installation instructions for the MLAgility project. -- [tools_user_guide.md](https://github.com/groq/mlagility/blob/main/docs/tools_user_guide.md): User guide for the MLAgility benchmarking tools: `benchit` CLI, `benchmark_script()`, and `benchmark_workload()`. +- [tools_user_guide.md](https://github.com/groq/mlagility/blob/main/docs/tools_user_guide.md): User guide for the MLAgility benchmarking tools: `benchit` CLI, `benchmark_script()`, and `benchmark_model()`. - [versioning.md](https://github.com/groq/mlagility/blob/main/docs/versioning.md): Defines the semantic versioning rules for the `mlagility` package. There is more useful documentation avialable in: diff --git a/docs/tools_user_guide.md b/docs/tools_user_guide.md index d5a352ac..09916eff 100644 --- a/docs/tools_user_guide.md +++ b/docs/tools_user_guide.md @@ -51,9 +51,9 @@ Example Output: Where `your_script.py` is a Python script that instantiates and executes a PyTorch model named `YourModel`. The benchmarking results are also saved to a `build directory` in the `MLAgility cache` (see [Build](#build)). The `benchit` CLI performs the following steps: -1. [Analysis](#analysis): profile the Python script to identify the PyTorch workloads within -2. [Build](#build): call the `benchmark_script()` [API](#the-mlagility-api) to prepare each workload for benchmarking -3. [Benchmark](#benchmark): call the `benchmark_workload()` [API](#the-mlagility-api) on each workload to gather performance statistics +1. [Analysis](#analysis): profile the Python script to identify the PyTorch models within +2. [Build](#build): call the `benchmark_script()` [API](#the-mlagility-api) to prepare each model for benchmarking +3. [Benchmark](#benchmark): call the `benchmark_model()` [API](#the-mlagility-api) on each model to gather performance statistics _Note_: The benchmarking methodology is defined [here](#benchmark). If you are looking for more detailed instructions on how to install mlagility, you can find that [here](https://github.com/groq/mlagility/blob/main/docs/install.md). @@ -65,19 +65,19 @@ _Note_: The benchmarking methodology is defined [here](#benchmark). If you are l Most of the functionality provided by the `benchit` CLI is also available in the MLAgility API: - `mlagility.benchmark_script()` provides the same benchmarking functionality as the `benchit` CLI: it takes a script and target device, and returns performance results. -- `mlagility.benchmark_workload()` provides a subset of this functionality: it takes a model and its inputs, and returns performance results. - - The main difference is that `benchmark_workload()` does not include the [Analysis](#analysis) feature, and `benchmark_script()` does. +- `mlagility.benchmark_model()` provides a subset of this functionality: it takes a model and its inputs, and returns performance results. + - The main difference is that `benchmark_model()` does not include the [Analysis](#analysis) feature, and `benchmark_script()` does. -Generally speaking, the `benchit` CLI is a command line interface for the `benchmark_script()` API, which internally calls `benchmark_workload()`. You can read more about this code organization [here](https://github.com/groq/mlagility/blob/main/docs/code.md). +Generally speaking, the `benchit` CLI is a command line interface for the `benchmark_script()` API, which internally calls `benchmark_model()`. You can read more about this code organization [here](https://github.com/groq/mlagility/blob/main/docs/code.md). For example, the following script: ```python -from mlagility import benchmark_workload +from mlagility import benchmark_model model = YourModel() results = model(**inputs) -perf = benchmark_workload(model, inputs) +perf = benchmark_model(model, inputs) ``` Will print an output like this: @@ -88,7 +88,7 @@ Will print an output like this: > throughput: 21784.8 ips ``` -`benchmark_workload()` returns a `MeasuredPerformance` object that includes members: +`benchmark_model()` returns a `MeasuredPerformance` object that includes members: - `latency_units`: unit of time used for measuring latency, which is set to `milliseconds (ms)`. - `mean_latency`: average benchmarking latency, measured in `latency_units`. - `throughput_units`: unit used for measuring throughput, which is set to `inferences per second (IPS)`. @@ -106,40 +106,34 @@ A **model** is a PyTorch (torch.nn.Module) instance that has been instantiated i - Examples: BERT-Base, ResNet-50, etc. -## Workload - -A **workload** is a model executed with inputs of a given shape. - -- Example: BERT-Base executed with 'attention_mask' and 'input_ids' of shape (1, 128). - ## Device -A **device** is a piece of hardware capable of running a workload. +A **device** is a piece of hardware capable of running a model. - Examples: Nvidia A100 40GB, Intel Xeon Platinum 8380, Groq GroqChip1 ## Runtime -A **runtime** is a piece of software that executes a workload on a device. +A **runtime** is a piece of software that executes a model on a device. - Different runtimes can produce different performance results on the same device because: - - Runtimes often optimize the workload prior to execution. + - Runtimes often optimize the model prior to execution. - The runtime is responsible for orchestrating data movement, device invocation, etc. - Examples: ONNX Runtime, TensorRT, PyTorch Eager Execution, etc. ## Analysis -**Analysis** is the process by which `benchmark_script()` inspects a Python script and identifies the PyTorch workloads within. +**Analysis** is the process by which `benchmark_script()` inspects a Python script and identifies the PyTorch models within. -`benchmark_script()` performs analysis by running and profiling your script. When a workload (see [Workload](#workload) is encountered, it is inspected to gather statistics (such as the number of parameters in the model) and/or pass it to the `benchmark_workload()` API for benchmarking. +`benchmark_script()` performs analysis by running and profiling your script. When a model object (see [Model](#model) is encountered, it is inspected to gather statistics (such as the number of parameters in the model) and/or pass it to the `benchmark_model()` API for benchmarking. > _Note_: the `benchit` CLI and `benchmark_script()` API both run your entire script. Please ensure that your script is safe to run, especially if you got it from the internet. -> See the [Multiple Workloads per Script tutorial](https://github.com/groq/mlagility/blob/main/examples/cli/readme.md#multiple-workloads-per-script) for a detailed example of how analysis can discover multiple models from a single script. +> See the [Multiple Models per Script tutorial](https://github.com/groq/mlagility/blob/main/examples/cli/readme.md#multiple-models-per-script) for a detailed example of how analysis can discover multiple models from a single script. -## Workload Hashes +## Model Hashes -Each `workload` in a `script` is identified by a unique `hash`. The `analysis` phase of `benchmark_script()` will display the `hash` for each worload. The `build` phase will save exported workloads to into the `cache` according to the naming scheme `{script_name}_{hash}`. +Each `model` in a `script` is identified by a unique `hash`. The `analysis` phase of `benchmark_script()` will display the `hash` for each model. The `build` phase will save exported models to into the `cache` according to the naming scheme `{script_name}_{hash}`. For example: @@ -169,40 +163,40 @@ Once a script has been benchmarked, all labels that correspond to that script wi ## Build -**Build** is the process by which the `benchmark_workload()` API consumes a [model](#model) and a corresponding set of `inputs` and produces ONNX files, Groq executables, and other artifacts needed for benchmarking. +**Build** is the process by which the `benchmark_model()` API consumes a [model](#model) and produces ONNX files, Groq executables, and other artifacts needed for benchmarking. We refer to this collection of artifacts as the `build directory` and store each build in the MLAgility `cache` for later use. We leverage ONNX files because of their broad compatibility with model frameworks (PyTorch, Keras, etc.), software (ONNX Runtime, TensorRT, Groq Compiler, etc.), and devices (CPUs, GPUs, GroqChip processors, etc.). You can learn more about ONNX [here](https://onnx.ai/). -The build functionality of `benchmark_workload()` includes the following steps: -1. Take a `model` object and a corresponding set of `inputs`* (which we call a [workload](#workload)). +The build functionality of `benchmark_model()` includes the following steps: +1. Take a `model` object and a corresponding set of `inputs`*. 1. Check the cache for a successful build we can load. If we get a cache hit, the build is done. If no build is found, or the build in the cache is stale**, continue. 1. Pass the `model` and `inputs` to the ONNX exporter corresponding to the `model`'s framework (e.g., PyTorch models use `torch.onnx.export()`). -1. Use [ONNX Runtime](https://github.com/microsoft/onnxruntime) and [ONNX ML tools](https://github.com/onnx/onnxmltools) to optimize the workload and convert it to float16, respectively. +1. Use [ONNX Runtime](https://github.com/microsoft/onnxruntime) and [ONNX ML tools](https://github.com/onnx/onnxmltools) to optimize the model and convert it to float16, respectively. 1. [If the build's device type is `groq`] Pass the optimized float16 ONNX file to Groq Compiler and Assembler to produce a Groq executable. 1. Save the successful build to the cache for later use. -> *_Note_: Each `build` corresponds to a set of static input shapes. `inputs` are passed into the `benchmark_workload()` API to provide those shapes. +> *_Note_: Each `build` corresponds to a set of static input shapes. `inputs` are passed into the `benchmark_model()` API to provide those shapes. > **_Note_: A cached build can be stale because of any of the following changes since the last build: > * The model changed > * The shape of the inputs changed -> * The arguments to `benchmark_workload()` changed +> * The arguments to `benchmark_model()` changed > * MLAgility was updated to a new, incompatible version ## Benchmark -*Benchmark* is the process by which the `benchmark_workload()` API collects performance statistics about a [workload](#workload). Specifically, `benchmark_workload()` takes a [build](#build) of a workload and executes it on a target device using target runtime software (see [Devices and Runtimes](#devices-and-runtimes)). +*Benchmark* is the process by which the `benchmark_model()` API collects performance statistics about a [model](#model). Specifically, `benchmark_model()` takes a [build](#build) of a model and executes it on a target device using target runtime software (see [Devices and Runtimes](#devices-and-runtimes)). -By default, `benchmark_workload()` will run the workload 100 times to collect the following statistics: +By default, `benchmark_model()` will run the model 100 times to collect the following statistics: 1. Mean Latency, in milliseconds (ms): the average time it takes the runtime/device combination to execute the model/inputs combination once. This includes the time spent invoking the device and transferring the model's inputs and outputs between host memory and the device (when applicable). 1. Throughput, in inferences per second (IPS): the number of times the model/inputs combination can be executed on the runtime/device combination per second. - > - _Note_: `benchmark_workload()` is not aware of whether `inputs` is a single input or a batch of inputs. If your `inputs` is actually a batch of inputs, you should multiply `benchmark_workload()`'s reported IPS by the batch size. + > - _Note_: `benchmark_model()` is not aware of whether `inputs` is a single input or a batch of inputs. If your `inputs` is actually a batch of inputs, you should multiply `benchmark_model()`'s reported IPS by the batch size. # Devices and Runtimes -MLAgility can be used to benchmark a workload across a variety of runtimes and devices, as long as the device is available and the device/runtime combination is supported by MLAgility. +MLAgility can be used to benchmark a model across a variety of runtimes and devices, as long as the device is available and the device/runtime combination is supported by MLAgility. ## Available Devices @@ -214,7 +208,7 @@ If you are using a remote machine, it must: - include the target device - have `miniconda`, `python>=3.8`, and `docker>=20.10` installed -When you call `benchit` CLI or `benchmark_workload()`, the following actions are performed on your behalf: +When you call `benchit` CLI or `benchmark_model()`, the following actions are performed on your behalf: 1. Perform a `build`, which exports all models from the script to ONNX and prepares for benchmarking. - If the device type selected is `groq`, this step also compiles the ONNX file into a Groq executable. 1. [Remote mode only] `ssh` into the remote machine and transfer the `build`. @@ -232,7 +226,7 @@ Specify a device type that will be used for benchmarking. Usage: - `benchit benchmark INPUT_FILES --device TYPE` - - Benchmark the workload(s) in `INPUT_FILES` on a locally installed device of type `TYPE` (eg, a locally installed Nvidia device). + - Benchmark the model(s) in `INPUT_FILES` on a locally installed device of type `TYPE` (eg, a locally installed Nvidia device). Valid values of `TYPE` include: - `x86` (default): Intel and AMD x86 CPUs. @@ -245,7 +239,7 @@ Valid values of `TYPE` include: Also available as API arguments: - `benchmark_script(device=...)` -- `benchmark_workload(device=...)`. +- `benchmark_model(device=...)`. > For a detailed example, see the [CLI Nvidia tutorial](https://github.com/groq/mlagility/blob/main/examples/cli/readme.md#nvidia-benchmarking). @@ -269,7 +263,7 @@ Valid values: Also available as API arguments: - `benchmark_script(backend=...)` -- `benchmark_workload(backend=...)` +- `benchmark_model(backend=...)` ### Runtimes @@ -290,7 +284,7 @@ Each device type has its own default runtime, as indicated below. This feature is also be available as an API argument: - `benchmark_script(runtimes=[...])` -- `benchmark_workload(runtime=...)` +- `benchmark_model(runtime=...)` > _Note_: `torch-eager` and `torch-compiled` are not available whe using the `remote` backend. @@ -303,7 +297,7 @@ The default usage of `benchit` is to directly provide it with a python script, f _Note_: Some of these tasks have to do with the MLAgility `cache`, which stores the `build directories` (see [Build](#build)). The commands are: -- [`benchmark`](#benchmark-command) (default command): benchmark the workload(s) in one or more scripts +- [`benchmark`](#benchmark-command) (default command): benchmark the model(s) in one or more scripts - [`cache list`](#list-command): list the available builds in the cache - [`cache print`](#print-command): print the [state](https://github.com/groq/groqflow/blob/main/docs/user_guide.md#stateyaml-file) of a build from the cache - [`cache delete`](#delete-command): delete one or more builds from the cache @@ -334,27 +328,27 @@ Examples: - `benchit *.py` - Benchmark all scripts which can be found at the current working directory. - `benchit models/*/*.py` - - Benchmark the entire corpora of MLAgility. + - Benchmark the entire corpora of MLAgility models. - `benchit *.onnx` - Benchmark all ONNX files which can be found at the current working directory. > See the [Benchmark Multiple Scripts tutorial](https://github.com/groq/mlagility/blob/main/examples/cli/discovery.md#benchmark-multiple-scripts) for a detailed example. -You can also leverage workload hashes (see [Workload Hashes](#workload-hashes)) to filter which workloads in a script will be acted on, in the following manner: - - `benchit example.py::hash_0` will only benchmark the workload corresponding to `hash_0`. - - You can also supply multiple hashes, for example `benchit example.py::hash_0,hash_1` will benchmark the workloads corresponding to both `hash_0` and `hash_1`. +You can also leverage model hashes (see [Model Hashes](#model-hashes)) to filter which models in a script will be acted on, in the following manner: + - `benchit example.py::hash_0` will only benchmark the model corresponding to `hash_0`. + - You can also supply multiple hashes, for example `benchit example.py::hash_0,hash_1` will benchmark the models corresponding to both `hash_0` and `hash_1`. -> _Note_: Using bash regular expressions and filtering workload by hashes are mutually exclusive. To filter workloads by hashes, provide the full path of the Python script rather than a regular expression. +> _Note_: Using bash regular expressions and filtering model by hashes are mutually exclusive. To filter models by hashes, provide the full path of the Python script rather than a regular expression. -> See the [Filtering Workload Hashes tutorial](https://github.com/groq/mlagility/blob/main/examples/cli/discovery.md#filtering-workload-hashes) for a detailed example. +> See the [Filtering Model Hashes tutorial](https://github.com/groq/mlagility/blob/main/examples/cli/discovery.md#filtering-model-hashes) for a detailed example. -Additionally, you can leverage labels (see [Labels](#labels)) to filter which workloads in a script will be acted on, in the following manner: +Additionally, you can leverage labels (see [Labels](#labels)) to filter which models in a script will be acted on, in the following manner: - `benchit *.py --labels test_group::a` will only benchmark the scripts labels with `test_group::a`. - You can also supply multiple labels, for example `benchit *.py --labels test_group::a domain::nlp` only benchmark scripts that have both `test_group::a`, and `domain::nlp` labels. -> _Note_: Using bash regular expressions and filtering workload by hashes are mutually exclusive. To filter workloads by hashes, provide the full path of the Python script rather than a regular expression. +> _Note_: Using bash regular expressions and filtering model by hashes are mutually exclusive. To filter models by hashes, provide the full path of the Python script rather than a regular expression. -> _Note_: ONNX file input currently supports only workloads of size less than 2 GB. ONNX files passed directly into `benchit *.onnx` are benchmarked as-is without applying any additional build stages. +> _Note_: ONNX file input currently supports only models of size less than 2 GB. ONNX files passed directly into `benchit *.onnx` are benchmarked as-is without applying any additional build stages. ### Use Slurm @@ -377,7 +371,7 @@ Available as an API argument: ### Process Isolation -Evaluate each `benchit` input in its own isolated subprocess. This option allows the main process to continue on to the next input if the current input fails for any reason (e.g., a bug in the input script, the operating system running out of memory, incompatibility between a workload and the selected benchmarking runtime, etc.). +Evaluate each `benchit` input in its own isolated subprocess. This option allows the main process to continue on to the next input if the current input fails for any reason (e.g., a bug in the input script, the operating system running out of memory, incompatibility between a model and the selected benchmarking runtime, etc.). Usage: - `benchit benchmark INPUT_FILES --process-isolation` @@ -397,7 +391,7 @@ Also available as an API argument: Also available as API arguments: - `benchmark_script(cache_dir=...)` -- `benchmark_workload(cache_dir=...)` +- `benchmark_model(cache_dir=...)` > See the [Cache Directory tutorial](https://github.com/groq/mlagility/blob/main/examples/cli/cache.md#cache-directory) for a detailed example. @@ -407,9 +401,9 @@ Also available as API arguments: Also available as API arguments: - `benchmark_script(lean_cache=True/False, ...)` (default False) -- `benchmark_workload(lean_cache=True/False, ...)` (default False) +- `benchmark_model(lean_cache=True/False, ...)` (default False) -> _Note_: useful for benchmarking many workloads, since the `build` artifacts from the workloads can take up a significant amount of hard drive space. +> _Note_: useful for benchmarking many models, since the `build` artifacts from the models can take up a significant amount of hard drive space. > See the [Lean Cache tutorial](https://github.com/groq/mlagility/blob/main/examples/cli/cache.md#lean-cache) for a detailed example. @@ -418,19 +412,19 @@ Also available as API arguments: `--rebuild REBUILD` Sets a cache policy that decides whether to load or rebuild a cached build. Takes one of the following values: - - *Default*: `"if_needed"` will use a cached workload if available, build one if it is not available,and rebuild any stale builds. - - Set `"always"` to force `benchit` to always rebuild your workload, regardless of whether it is available in the cache or not. - - Set `"never"` to make sure `benchit` never rebuilds your workload, even if it is stale. `benchit`will attempt to load any previously built workload in the cache, however there is no guarantee it will be functional or correct. + - *Default*: `"if_needed"` will use a cached model if available, build one if it is not available,and rebuild any stale builds. + - Set `"always"` to force `benchit` to always rebuild your model, regardless of whether it is available in the cache or not. + - Set `"never"` to make sure `benchit` never rebuilds your model, even if it is stale. `benchit`will attempt to load any previously built model in the cache, however there is no guarantee it will be functional or correct. Also available as API arguments: - `benchmark_script(rebuild=...)` -- `benchmark_workload(rebuild=...)` +- `benchmark_model(rebuild=...)` > See the [GroqFlow rebuild examples](https://github.com/groq/groqflow/tree/main/examples) to learn more. ### Sequence File -Replaces the default build sequence in `benchmark_workload()` with a custom build sequence, defined in a Python script. +Replaces the default build sequence in `benchmark_model()` with a custom build sequence, defined in a Python script. Usage: - `benchit benchmark INPUT_FILES --sequence-file FILE` @@ -439,9 +433,9 @@ This script must define a function, `get_sequence()`, that returns an instance o Also available as API arguments: - `benchmark_script(sequence=...)` -- `benchmark_workload(sequence=...)` +- `benchmark_model(sequence=...)` -> _Note_: the `sequence` argument to `benchmark_script()` can be either a sequence file or a `Sequence` instance. The `sequence` argument to `benchmark_workload()` must be a `Sequence` instance. +> _Note_: the `sequence` argument to `benchmark_script()` can be either a sequence file or a `Sequence` instance. The `sequence` argument to `benchmark_model()` must be a `Sequence` instance. > See the [Sequence File tutorial](https://github.com/groq/mlagility/blob/main/examples/cli/build.md#sequence-file) for a detailed example. @@ -460,7 +454,7 @@ Also available as an API argument: ### Maximum Analysis Depth -Depth of sub-workloads to inspect within the script. Default value is 0, indicating to only analyze workloads at the top level of the script. Depth of 1 would indicate to analyze the first level of sub-workloads within the top-level workloads. +Depth of sub-models to inspect within the script. Default value is 0, indicating to only analyze models at the top level of the script. Depth of 1 would indicate to analyze the first level of sub-models within the top-level models. Usage: - `benchit benchmark INPUT_FILES --max-depth DEPTH` @@ -481,17 +475,17 @@ Usage: Also available as API arguments: - `benchmark_script(onnx_opset=...)` -- `benchmark_workloads(onnx_opset=...)` +- `benchmark_models(onnx_opset=...)` > _Note_: ONNX opset can also be set by an environment variable. The --onnx-opset argument takes precedence over the environment variable. See [MLAGILITY_ONNX_OPSET](#set-the-onnx-opset). ### Analyze Only -Instruct `benchit` or `benchmark_workload()` to only run the [Analysis](#analysis) phase of the `benchmark` command. +Instruct `benchit` or `benchmark_model()` to only run the [Analysis](#analysis) phase of the `benchmark` command. Usage: - `benchit benchmark INPUT_FILES --analyze-only` - - This discovers workloads within the input script and prints information about them, but does not perform any build or benchmarking. + - This discovers models within the input script and prints information about them, but does not perform any build or benchmarking. > _Note_: any build- or benchmark-specific options will be ignored, such as `--backend`, `--device`, `--groqview`, etc. @@ -502,33 +496,33 @@ Also available as an API argument: ### Build Only -Instruct `benchit`, `benchmark_script()`, or `benchmark_workload()` to only run the [Analysis](#analysis) and [Build](#build) phases of the `benchmark` command. +Instruct `benchit`, `benchmark_script()`, or `benchmark_model()` to only run the [Analysis](#analysis) and [Build](#build) phases of the `benchmark` command. Usage: - `benchit benchmark INPUT_FILES --build-only` - - This builds the workloads within the input script, however does not run any benchmark. + - This builds the models within the input script, however does not run any benchmark. > _Note_: any benchmark-specific options will be ignored, such as `--backend`. Also available as API arguments: - `benchmark_script(build_only=True/False)` (default False) -- `benchmark_workload(build_only=True/False)` (default False) +- `benchmark_model(build_only=True/False)` (default False) > See the [Build Only tutorial](https://github.com/groq/mlagility/blob/main/examples/cli/build.md#build-only) for a detailed example. ### Export Only -Instruct `benchit`, `benchmark_script()`, or `benchmark_workload()` to only run the [Analysis](#analysis) and [Build](#build) phases of the `benchmark` command, and to stop the Build phase after exporting the ONNX file. Similar to [Build Only](#build-only), except that no optimization Stages will be applied to the ONNX file. +Instruct `benchit`, `benchmark_script()`, or `benchmark_model()` to only run the [Analysis](#analysis) and [Build](#build) phases of the `benchmark` command, and to stop the Build phase after exporting the ONNX file. Similar to [Build Only](#build-only), except that no optimization Stages will be applied to the ONNX file. Usage: - `benchit benchmark INPUT_FILES --export-only` - - This exports ONNX files for the workloads within the input script, however does not optimize those ONNX files nor run any benchmark. + - This exports ONNX files for the models within the input script, however does not optimize those ONNX files nor run any benchmark. > _Note_: any benchmark-specific options will be ignored, such as `--backend`. Also available as API arguments: - `benchmark_script(export_only=True/False)` (default False) -- `benchmark_workload(export_only=True/False)` (default False) +- `benchmark_model(export_only=True/False)` (default False) ### Resume @@ -538,7 +532,7 @@ For example: - `benchit benchmark INPUT_FILES` will benchmark everything in `INPUT_FILES`, regardless of whether benchmarking those scripts has been attempted previously. - `benchit benchmark INPUT_FILES --resume` will benchmark everything `INPUT_FILES` that has not been previously attempted. -The `--resume` behavior is useful for when you are benchmarking a large corpus of workloads, and one of the workloads crashes your run. If you repeat the same command, but with the `--resume` argument, then the new run will pick up where the last run left off, including skipping over any input scripts that crashed previously. +The `--resume` behavior is useful for when you are benchmarking a large corpus of models, and one of the models crashes your run. If you repeat the same command, but with the `--resume` argument, then the new run will pick up where the last run left off, including skipping over any input scripts that crashed previously. > _Note_: if `--resume` is skipping over any input scripts that you *do* want to evaluate, you have two options: > - Manually build the input script with `benchit benchmark INPUT_SCRIPT` without setting `--resume` @@ -551,13 +545,13 @@ Also available as an API argument: The following options are specific to Groq builds and benchmarks, and are passed into the [GroqFlow build tool](https://github.com/groq/groqflow). Learn more about them in the [GroqFlow user guide](https://github.com/groq/groqflow/blob/main/docs/user_guide.md). - `--groq-compiler-flags COMPILER_FLAGS [COMPILER_FLAGS ...]` Sets the groqit(compiler_flags=...) arg within the GroqFlow build tool (default behavior is to use groqit()'s default compiler flags) - - Also available as API arguments: `benchmark_script(groq_compiler_flags=...)`, `benchmark_workload(groq_compiler_flags=...)`. + - Also available as API arguments: `benchmark_script(groq_compiler_flags=...)`, `benchmark_model(groq_compiler_flags=...)`. - `--groq-assembler-flags ASSEMBLER_FLAGS [ASSEMBLER_FLAGS ...]` Sets the groqit(assembler_flags=...) arg within the GroqFlow build tool (default behavior is to use groqit()'s default assembler flags) - - Also available as API arguments: `benchmark_script(groq_assembler_flags=...)`, `benchmark_workload(groq_assembler_flags=...)`. + - Also available as API arguments: `benchmark_script(groq_assembler_flags=...)`, `benchmark_model(groq_assembler_flags=...)`. - `--groq-num-chips NUM_CHIPS` Sets the groqit(num_chips=...) arg (default behavior is to let groqit() automatically select the number of chips) - - Also available as API arguments: `benchmark_script(groq_num_chips=...)`, `benchmark_workload(groq_num_chips=...)`. + - Also available as API arguments: `benchmark_script(groq_num_chips=...)`, `benchmark_model(groq_num_chips=...)`. - `--groqview` Enables GroqView for the build(s) - - Also available as API arguments: `benchmark_script(groqview=True/False,)`, `benchmark_workload(groqview=True/False,)`. + - Also available as API arguments: `benchmark_script(groqview=True/False,)`, `benchmark_model(groqview=True/False,)`. ## Cache Commands @@ -658,7 +652,7 @@ export MLAGILITY_CACHE_DIR=~/a_different_cache_dir ### Show Traceback -By default, `benchit` and `benchmark_script()` will display the traceback for any exceptions caught during workload build. However, you may sometimes want a cleaner output on your terminal. To accomplish this, set the `MLAGILITY_TRACEBACK` environment variable to `False`, which will catch any exceptions during workload build and benchmark and display a simple error message like `Status: Unknown benchit error: {e}`. +By default, `benchit` and `benchmark_script()` will display the traceback for any exceptions caught during model build. However, you may sometimes want a cleaner output on your terminal. To accomplish this, set the `MLAGILITY_TRACEBACK` environment variable to `False`, which will catch any exceptions during model build and benchmark and display a simple error message like `Status: Unknown benchit error: {e}`. For example: @@ -668,7 +662,7 @@ export MLAGILITY_TRACEBACK=False ### Preserve Terminal Outputs -By default, `benchit` and `benchmark_script()` will erase the contents of the terminal in order to present a clean status update for each script and workload evaluated. +By default, `benchit` and `benchmark_script()` will erase the contents of the terminal in order to present a clean status update for each script and model evaluated. However, you may want to see everything that is being printed to the terminal. You can accomplish this by setting the `MLAGILITY_DEBUG` environment variable to `True`. For example: @@ -678,7 +672,7 @@ export MLAGILITY_DEBUG=True ### Set the ONNX Opset -By default, `benchit`, `benchmark_script()`, and `benchmark_workload()` will use the default ONNX opset defined in `onnxflow.common.build.DEFAULT_ONNX_OPSET`. You can set a different default ONNX opset by setting the `MLAGILITY_ONNX_OPSET` environment variable. +By default, `benchit`, `benchmark_script()`, and `benchmark_model()` will use the default ONNX opset defined in `onnxflow.common.build.DEFAULT_ONNX_OPSET`. You can set a different default ONNX opset by setting the `MLAGILITY_ONNX_OPSET` environment variable. For example: diff --git a/examples/cli/build.md b/examples/cli/build.md index bcd07060..c52b8181 100644 --- a/examples/cli/build.md +++ b/examples/cli/build.md @@ -6,7 +6,7 @@ This chapter of the `benchit` CLI tutorial focuses on techniques to customize th The tutorial chapters are: 1. [Getting Started](https://github.com/groq/mlagility/blob/main/examples/cli/readme.md) -1. [Guiding Workload Discovery](https://github.com/groq/mlagility/blob/main/examples/cli/discovery.md): `benchit` arguments that customize the workload discovery process to help streamline your workflow. +1. [Guiding Model Discovery](https://github.com/groq/mlagility/blob/main/examples/cli/discovery.md): `benchit` arguments that customize the model discovery process to help streamline your workflow. 1. [Working with the Cache](https://github.com/groq/mlagility/blob/main/examples/cli/cache.md): `benchit` arguments and commands that help you understand, inspect, and manipulate the `mlagility cache`. 1. Customizing Builds (this document): `benchit` arguments that customize build behavior to unlock new workflows. @@ -16,7 +16,7 @@ All of the tutorials assume that your current working directory is in the same l ## Build Only -`benchit` provides the `--build-only` argument for when you want to analyze and build the workloads in a script, without actually benchmarking them. +`benchit` provides the `--build-only` argument for when you want to analyze and build the models in a script, without actually benchmarking them. You can try it out with this command: @@ -43,7 +43,7 @@ pytorch_outputs: tensor([-0.1675, 0.1548, -0.1627, 0.0067, 0.3353], grad_fn=< Woohoo! The 'benchmark' command is complete. ``` -You can see that the workload is discovered and built, but no benchmark took place. +You can see that the model is discovered and built, but no benchmark took place. > See the [Build Only documentation](https://github.com/groq/mlagility/blob/main/docs/tools_user_guide.md#build-only) for more details. @@ -51,7 +51,7 @@ You can see that the workload is discovered and built, but no benchmark took pla You can customize the behavior of the [Build](https://github.com/groq/mlagility/blob/main/docs/tools_user_guide.md#build) stage of `benchit` by creating a custom `Sequence`. -A `Sequence` tells the `benchmark_workload()` API within `benchit` how to `build` a model to prepare it for benchmarking. +A `Sequence` tells the `benchmark_model()` API within `benchit` how to `build` a model to prepare it for benchmarking. The default `Sequence` for CPU and GPU benchmarking performs the following build steps: 1. Export the model to an ONNX file @@ -97,6 +97,6 @@ If we then repeat the `benchit cache stats hello_world_479b1332` we will see tha Now that you have completed this tutorial, make sure to check out the other tutorials if you want to learn more: 1. [Getting Started](https://github.com/groq/mlagility/blob/main/examples/cli/readme.md) -1. [Guiding Workload Discovery](https://github.com/groq/mlagility/blob/main/examples/cli/discovery.md): `benchit` arguments that customize the workload discovery process to help streamline your workflow. +1. [Guiding Model Discovery](https://github.com/groq/mlagility/blob/main/examples/cli/discovery.md): `benchit` arguments that customize the model discovery process to help streamline your workflow. 1. [Working with the Cache](https://github.com/groq/mlagility/blob/main/examples/cli/cache.md): `benchit` arguments and commands that help you understand, inspect, and manipulate the `mlagility cache`. 1. Customizing Builds (this document): `benchit` arguments that customize build behavior to unlock new workflows. \ No newline at end of file diff --git a/examples/cli/cache.md b/examples/cli/cache.md index 30393230..e1fc841e 100644 --- a/examples/cli/cache.md +++ b/examples/cli/cache.md @@ -10,7 +10,7 @@ This chapter of the `benchit` CLI tutorials is focused on understanding, inspect The tutorial chapters are: 1. [Getting Started](https://github.com/groq/mlagility/blob/main/examples/cli/readme.md) -1. [Guiding Workload Discovery](https://github.com/groq/mlagility/blob/main/examples/cli/discovery.md): `benchit` arguments that customize the workload discovery process to help streamline your workflow. +1. [Guiding Model Discovery](https://github.com/groq/mlagility/blob/main/examples/cli/discovery.md): `benchit` arguments that customize the model discovery process to help streamline your workflow. 1. Working with the Cache (this document): `benchit` arguments and commands that help you understand, inspect, and manipulate the `mlagility cache`. 1. [Customizing Builds](https://github.com/groq/mlagility/blob/main/examples/cli/build.md): `benchit` arguments that customize build behavior to unlock new workflows. @@ -191,6 +191,6 @@ total 20K Now that you have completed this tutorial, make sure to check out the other tutorials if you want to learn more: 1. [Getting Started](https://github.com/groq/mlagility/blob/main/examples/cli/readme.md) -1. [Guiding Workload Discovery](https://github.com/groq/mlagility/blob/main/examples/cli/discovery.md): `benchit` arguments that customize the workload discovery process to help streamline your workflow. +1. [Guiding Model Discovery](https://github.com/groq/mlagility/blob/main/examples/cli/discovery.md): `benchit` arguments that customize the model discovery process to help streamline your workflow. 1. Working with the Cache (this document): `benchit` arguments and commands that help you understand, inspect, and manipulate the `mlagility cache`. 1. [Customizing Builds](https://github.com/groq/mlagility/blob/main/examples/cli/build.md): `benchit` arguments that customize build behavior to unlock new workflows. \ No newline at end of file diff --git a/examples/cli/discovery.md b/examples/cli/discovery.md index 2e9ba495..8efe2e56 100644 --- a/examples/cli/discovery.md +++ b/examples/cli/discovery.md @@ -1,24 +1,24 @@ -# Guiding Workload Discovery +# Guiding Model Discovery -This chapter of the `benchit` CLI tutorial is focused on how to guide the tool as it discovers workloads. You will learn things such as: -- [How to run workload discovery, without spending time on builds or benchmarking](#analyze-only) -- [How to benchmark all the workloads in all the scripts in a directory](#benchmark-multiple-scripts) -- [How to analyze the building blocks of a workload](#maximum-analysis-depth) -- [How to filter which workloads are passed to the build and benchmark operations](#filtering-workload-hashes) +This chapter of the `benchit` CLI tutorial is focused on how to guide the tool as it discovers models. You will learn things such as: +- [How to run model discovery, without spending time on builds or benchmarking](#analyze-only) +- [How to benchmark all the models in all the scripts in a directory](#benchmark-multiple-scripts) +- [How to analyze the building blocks of a model](#maximum-analysis-depth) +- [How to filter which models are passed to the build and benchmark operations](#filtering-model-hashes) The tutorial chapters are: 1. [Getting Started](https://github.com/groq/mlagility/blob/main/examples/cli/readme.md) -1. Guiding Workload Discovery (this document): `benchit` arguments that customize the workload discovery process to help streamline your workflow. +1. Guiding Model Discovery (this document): `benchit` arguments that customize the model discovery process to help streamline your workflow. 1. [Working with the Cache](https://github.com/groq/mlagility/blob/main/examples/cli/cache.md): `benchit` arguments and commands that help you understand, inspect, and manipulate the `mlagility cache`. 1. [Customizing Builds](https://github.com/groq/mlagility/blob/main/examples/cli/build.md): `benchit` arguments that customize build behavior to unlock new workflows. -# Workload Discovery Tutorials +# Model Discovery Tutorials All of the tutorials assume that your current working directory is in the same location as this readme file (`examples/cli`). ## Analyze Only -`benchit` provides the `--analyze-only` argument for when you want to analyze the workloads in a script, without actually building or benchmarking them. +`benchit` provides the `--analyze-only` argument for when you want to analyze the models in a script, without actually building or benchmarking them. You can try it out with this command: @@ -29,7 +29,7 @@ benchit benchmark scripts/hello_world.py --analyze-only Which gives a result like: ``` -Workloads discovered during profiling: +Models discovered during profiling: hello_world.py: pytorch_model (executed 1x - 0.00s) @@ -44,13 +44,13 @@ pytorch_outputs: tensor([-0.1675, 0.1548, -0.1627, 0.0067, 0.3353], grad_fn=< Woohoo! The 'benchmark' command is complete. ``` -You can see that the workload is discovered, and some stats are printed, but no build or benchmark took place. +You can see that the model is discovered, and some stats are printed, but no build or benchmark took place. > See the [Analyze Only documentation](https://github.com/groq/mlagility/blob/main/docs/tools_user_guide.md#analyze-only) for more details. ## Benchmark Multiple Scripts -If you want to benchmark an entire corpus of models, but you don't want to call `benchit` individually on each python file you may provide more than one python file to benchit at a time. +If you want to benchmark an entire corpus of models, but you don't want to call `benchit` individually on each model you may provide more than one python file to benchit at a time. For example, the command: @@ -64,11 +64,11 @@ or the command benchit scripts/*.py ``` -Will iterate over every workload in every script in the `scripts` directory, producing a result like this: +Will iterate over every model in every script in the `scripts` directory, producing a result like this: ``` -Workloads discovered during profiling: +Models discovered during profiling: hello_world.py: pytorch_model (executed 1x) @@ -77,7 +77,7 @@ hello_world.py: Location: /home/jfowers/mlagility/examples/cli/scripts/hello_world.py, line 29 Parameters: 55 (<0.1 MB) Hash: 479b1332 - Status: Successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz + Status: Model successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz Mean Latency: 0.000 milliseconds (ms) Throughput: 657792.2 inferences per second (IPS) @@ -88,7 +88,7 @@ two_models.py: Location: /home/jfowers/mlagility/examples/cli/scripts/two_models.py, line 40 Parameters: 510 (<0.1 MB) Hash: 215ca1e3 - Status: Successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz + Status: Model successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz Mean Latency: 0.000 milliseconds (ms) Throughput: 509528.6 inferences per second (IPS) @@ -99,7 +99,7 @@ max_depth.py: Location: /home/jfowers/mlagility/examples/cli/scripts/max_depth.py, line 41 Parameters: 85 (<0.1 MB) Hash: 80b93950 - Status: Successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz + Status: Model successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz Mean Latency: 0.000 milliseconds (ms) Throughput: 693955.3 inferences per second (IPS) @@ -135,7 +135,7 @@ benchit benchmark scripts/max_depth.py --max-depth 1 You get a result like: ``` -Workloads discovered during profiling: +Models discovered during profiling: max_depth.py: pytorch_model (executed 1x) @@ -144,7 +144,7 @@ max_depth.py: Location: /home/jfowers/mlagility/examples/cli/scripts/max_depth.py, line 41 Parameters: 85 (<0.1 MB) Hash: 80b93950 - Status: Successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz + Status: Model successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz Mean Latency: 0.000 milliseconds (ms) Throughput: 533884.4 inferences per second (IPS) @@ -154,7 +154,7 @@ max_depth.py: Class: Linear () Parameters: 55 (<0.1 MB) Hash: 6d5eb4ee - Status: Successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz + Status: Model successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz Mean Latency: 0.000 milliseconds (ms) Throughput: 809701.4 inferences per second (IPS) @@ -163,7 +163,7 @@ max_depth.py: Class: Linear () Parameters: 30 (<0.1 MB) Hash: d4b2ffa7 - Status: Successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz + Status: Model successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz Mean Latency: 0.000 milliseconds (ms) Throughput: 677945.2 inferences per second (IPS) ``` @@ -174,22 +174,22 @@ You can see that the two instances of `torch.nn.Linear`, `fc` and `fc2`, are ben -## Filtering Workload Hashes +## Filtering Model Hashes -When you ran the example from the [Multiple Workloads per Script](https://github.com/groq/mlagility/blob/main/examples/cli/readme.md#multiple-workloads-per-script) tutorial, you saw that `benchit` discovered, built, and benchmarked two workloads. What if you only wanted to build and benchmark one of the workloads? +When you ran the example from the [Multiple Models per Script](https://github.com/groq/mlagility/blob/main/examples/cli/readme.md#multiple-models-per-script) tutorial, you saw that `benchit` discovered, built, and benchmarked two models. What if you only wanted to build and benchmark one of the models? -You can leverage the workload hashes feature of `benchit` to filter which workloads are acted on. You can see in the result from [Multiple Workloads per Script](https://github.com/groq/mlagility/blob/main/examples/cli/readme.md#multiple-workloads-per-script) that the two workloads, `pytorch_model` and `another_pytorch_model`, have hashes `f93db89f` and `c69b7dea`, respectively. +You can leverage the model hashes feature of `benchit` to filter which models are acted on. You can see in the result from [Multiple Models per Script](https://github.com/groq/mlagility/blob/main/examples/cli/readme.md#multiple-models-per-script) that the two models, `pytorch_model` and `another_pytorch_model`, have hashes `479b1332` and `215ca1e3`, respectively. -If you wanted to only build and benchmark `another_pytorch_model`, you could use this command, which filters `two_models.py` with the hash `c69b7dea`: +If you wanted to only build and benchmark `another_pytorch_model`, you could use this command, which filters `two_models.py` with the hash `215ca1e3`: ``` -benchit benchmark scripts/two_models.py::c69b7dea +benchit benchmark scripts/two_models.py::215ca1e3 ``` That would produce a result like: ``` -Workloads discovered during profiling: +Models discovered during profiling: two_models.py: pytorch_model (executed 1x) @@ -197,15 +197,15 @@ two_models.py: Class: SmallModel () Location: /home/jfowers/mlagility/examples/cli/scripts/two_models.py, line 32 Parameters: 55 (<0.1 MB) - Hash: f93db89f + Hash: 479b1332 another_pytorch_model (executed 1x) Model Type: Pytorch (torch.nn.Module) Class: SmallModel () Location: /home/jfowers/mlagility/examples/cli/scripts/two_models.py, line 40 Parameters: 510 (<0.1 MB) - Hash: c69b7dea - Status: Successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz + Hash: 215ca1e3 + Status: Model successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz Mean Latency: 0.000 milliseconds (ms) Throughput: 499272.2 inferences per second (IPS) @@ -216,15 +216,15 @@ more_pytorch_outputs: tensor([-0.1198, -0.5344, -0.1920, -0.1565, 0.2279, 0.69 Woohoo! The 'benchmark' command is complete. ``` -You can see that both workloads are discovered, but only `another_pytorch_model` was built and benchmarked. +You can see that both models are discovered, but only `another_pytorch_model` was built and benchmarked. > See the [Input Script documentation](https://github.com/groq/mlagility/blob/main/docs/tools_user_guide.md#input-script) for more details. -## Filtering Script Labels +## Filtering Model Labels -You can also leverage the labels feature of `benchit` to filter which scripts are acted on. Labels are pragmas added by the user to the first line of a `.py` file to list some of the attributes of that given script. `hello_world.py`, for example has the label `test_group::a`, while `two_models.py` and `max_depth.py` have the label `test_group::b`. +You can also leverage the labels feature of `benchit` to filter which models are acted on. Labels are pragmas added by the user to the first line of a `.py` file to list some of the attributes of that given script. `hello_world.py`, for example has the label `test_group::a`, while `two_models.py` and `max_depth.py` have the label `test_group::b`. -If you wanted to only build and benchmark scripts that have the label `test_group::a`, you could use the command: +If you wanted to only build and benchmark models that have the label `test_group::a`, you could use the command: ``` benchit scripts/*.py --labels test_group::a @@ -233,7 +233,7 @@ benchit scripts/*.py --labels test_group::a That would produce a result like: ``` -Workloads discovered during profiling: +Models discovered during profiling: hello_world.py: pytorch_model (executed 1x) @@ -242,7 +242,7 @@ hello_world.py: Location: /net/home/dhnoronha/mlagility/examples/cli/scripts/hello_world.py, line 30 Parameters: 55 (<0.1 MB) Hash: 479b1332 - Status: Successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz + Status: Model successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz Mean Latency: 0.000 milliseconds (ms) Throughput: 490444.1 inferences per second (IPS) @@ -255,6 +255,6 @@ Woohoo! The 'benchmark' command is complete. Now that you have completed this tutorial, make sure to check out the other tutorials if you want to learn more: 1. [Getting Started](https://github.com/groq/mlagility/blob/main/examples/cli/readme.md) -1. Guiding Workload Discovery (this document): `benchit` arguments that customize the model discovery process to help streamline your workflow. +1. Guiding Model Discovery (this document): `benchit` arguments that customize the model discovery process to help streamline your workflow. 1. [Working with the Cache](https://github.com/groq/mlagility/blob/main/examples/cli/cache.md): `benchit` arguments and commands that help you understand, inspect, and manipulate the `mlagility cache`. 1. [Customizing Builds](https://github.com/groq/mlagility/blob/main/examples/cli/build.md): `benchit` arguments that customize build behavior to unlock new workflows. diff --git a/examples/cli/extras/example_sequence.py b/examples/cli/extras/example_sequence.py index 946da4ea..0af4fe28 100644 --- a/examples/cli/extras/example_sequence.py +++ b/examples/cli/extras/example_sequence.py @@ -1,7 +1,7 @@ """ This script is an example of a sequence.py file. Such a sequence.py file can be used to redefine the build phase of the benchit CLI, benchmark_script(), -and benchmark_workload() to have any custom behavior. +and benchmark_model() to have any custom behavior. In this example sequence.py file we are setting the build sequence to simply export from pytorch to ONNX. This differs from the default build sequence, which diff --git a/examples/cli/readme.md b/examples/cli/readme.md index 69400043..6b833ea3 100644 --- a/examples/cli/readme.md +++ b/examples/cli/readme.md @@ -8,7 +8,7 @@ Once you've familiarized yourself with these features, head over to the [`models The tutorials are organized into a few chapters: 1. Getting Started (this document) -1. [Guiding Workload Discovery](https://github.com/groq/mlagility/blob/main/examples/cli/discovery.md): `benchit` arguments that customize the model discovery process to help streamline your workflow. +1. [Guiding Model Discovery](https://github.com/groq/mlagility/blob/main/examples/cli/discovery.md): `benchit` arguments that customize the model discovery process to help streamline your workflow. 1. [Working with the Cache](https://github.com/groq/mlagility/blob/main/examples/cli/cache.md): `benchit` arguments and commands that help you understand, inspect, and manipulate the `mlagility cache`. 1. [Customizing Builds](https://github.com/groq/mlagility/blob/main/examples/cli/build.md): `benchit` arguments that customize build behavior to unlock new workflows. @@ -16,7 +16,7 @@ In this tutorial you will learn things such as: - [How to benchmark BERT with one command](#just-benchmark-bert) - [A "hello world" example, which is the easiest way to get started](#hello-world) - [Benchmarking on Nvidia GPUs](#nvidia-benchmarking) -- [Working with scripts that invoke more than one workload](#multiple-workloads-per-script) +- [Working with scripts that invoke more than one model](#multiple-models-per-script) - [Benchmarking an ONNX file](#onnx-benchmarking) # Just Benchmark BERT @@ -33,7 +33,7 @@ benchit $models/transformers/bert.py This will produce a result that looks like this, which shows you the performance of BERT-Base on your CPU: ``` -Workloads discovered during profiling: +Models discovered during profiling: bert.py: model (executed 1x) @@ -66,14 +66,14 @@ That commands `benchit` benchmark `hello_world.py` on your CPU. Specifically, `b 1. Pass `scripts/hello_world.py` as the input_script to the `benchmark` command of `benchit`. - _Note_: `benchit <.py file>` is a shorthand for `benchit benchmark <.py file>`. 1. Run `hello_world.py` against a profiler and look for models from supported machine learning frameworks (e.g. Pytorch). -2. Discover the `pytorch_model` instance of class `SmallModel` and print some statistics about it. -3. Export `pytorch_model` to an ONNX file, optimize that ONNX file, and convert it to the `float16` data type. -4. Measure the performance of the ONNX file on your x86 CPU and report the `mean latency` and `throughput`. +1. Discover the `pytorch_model` instance of class `SmallModel`, which is a PyTorch model, and print some statistics about it. +1. Export `pytorch_model` to an ONNX file, optimize that ONNX file, and convert it to the `float16` data type. +1. Measure the performance of the ONNX file on your x86 CPU and report the `mean latency` and `throughput`. The result looks like this: ``` -Workloads discovered during profiling: +Models discovered during profiling: hello_world.py: pytorch_model (executed 1x) @@ -82,7 +82,7 @@ hello_world.py: Location: /home/jfowers/mlagility/examples/cli/hello_world.py, line 29 Parameters: 55 (<0.1 MB) Hash: 479b1332 - Status: Successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz + Status: Model successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz Mean Latency: 0.001 milliseconds (ms) Throughput: 185964.8 inferences per second (IPS) @@ -106,7 +106,7 @@ benchit scripts/hello_world.py --device nvidia To get a result like this: ``` -Workloads discovered during profiling: +Models discovered during profiling: hello_world.py: pytorch_model (executed 1x) @@ -115,7 +115,7 @@ hello_world.py: Location: /home/jfowers/mlagility/examples/cli/hello_world.py, line 29 Parameters: 55 (<0.1 MB) Hash: 479b1332 - Status: Successfully benchmarked on NVIDIA A100-SXM4-40GB + Status: Model successfully benchmarked on NVIDIA A100-SXM4-40GB Mean Latency: 0.027 milliseconds (ms) Throughput: 21920.5 inferences per second (IPS) @@ -126,9 +126,9 @@ Woohoo! The 'benchmark' command is complete. You can see that the device mentioned in the status is a `NVIDIA A100-SXM4-40GB`. -## Multiple Workloads per Script +## Multiple Models per Script -The MLAgility tools will benchmark all workloads discovered in the input script. We can demonstrate this with the `two_models.py` script. +The MLAgility tools will benchmark all models discovered in the input script. We can demonstrate this with the `two_models.py` script. Run the following command: @@ -139,7 +139,7 @@ benchit scripts/two_models.py To get a result like: ``` -Workloads discovered during profiling: +Models discovered during profiling: two_models.py: pytorch_model (executed 1x) @@ -148,7 +148,7 @@ two_models.py: Location: /home/jfowers/mlagility/examples/cli/scripts/two_models.py, line 32 Parameters: 55 (<0.1 MB) Hash: 479b1332 - Status: Successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz + Status: Model successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz Mean Latency: 0.000 milliseconds (ms) Throughput: 640717.1 inferences per second (IPS) @@ -158,7 +158,7 @@ two_models.py: Location: /home/jfowers/mlagility/examples/cli/scripts/two_models.py, line 40 Parameters: 510 (<0.1 MB) Hash: 215ca1e3 - Status: Successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz + Status: Model successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz Mean Latency: 0.000 milliseconds (ms) Throughput: 642021.1 inferences per second (IPS) @@ -169,7 +169,7 @@ more_pytorch_outputs: tensor([-0.1198, -0.5344, -0.1920, -0.1565, 0.2279, 0.69 Woohoo! The 'benchmark' command is complete. ``` -You can see that both workloads in `two_models.py`, `pytorch_model` and `another_pytorch_model`, are discovered and benchmarked. +You can see that both model instances in `two_models.py`, `pytorch_model` and `another_pytorch_model`, are both discovered and benchmarked. ## ONNX Benchmarking @@ -200,6 +200,6 @@ Info: Performance of build sample on x86 device Intel(R) Xeon(R) CPU @ 2.20GHz i # Thanks! Now that you have completed this tutorial, make sure to check out the other tutorials if you want to learn more: -1. [Guiding Workload Discovery](https://github.com/groq/mlagility/blob/main/examples/cli/discovery.md): `benchit` arguments that customize the model discovery process to help streamline your workflow. +1. [Guiding Model Discovery](https://github.com/groq/mlagility/blob/main/examples/cli/discovery.md): `benchit` arguments that customize the model discovery process to help streamline your workflow. 1. [Working with the Cache](https://github.com/groq/mlagility/blob/main/examples/cli/cache.md): `benchit` arguments and commands that help you understand, inspect, and manipulate the `mlagility cache`. 1. [Customizing Builds](https://github.com/groq/mlagility/blob/main/examples/cli/cache.md): `benchit` arguments that customize build behavior to unlock new workflows. \ No newline at end of file diff --git a/examples/model_api/hello_world.py b/examples/model_api/hello_world.py index bbd0be14..4553d3f2 100644 --- a/examples/model_api/hello_world.py +++ b/examples/model_api/hello_world.py @@ -1,6 +1,6 @@ import argparse import torch -from mlagility import benchmark_workload +from mlagility import benchmark_model torch.manual_seed(0) @@ -56,7 +56,7 @@ def main(): # Benchmark the model on the specified device and backend print(f"Benchmarking on {args.device} {args.backend}...") - benchmark_workload( + benchmark_model( pytorch_model, inputs, build_name="hello_api_world", diff --git a/examples/readme.md b/examples/readme.md index 0d05e36d..e9e8aebb 100644 --- a/examples/readme.md +++ b/examples/readme.md @@ -2,6 +2,6 @@ This directory contains examples to help you learn how to use the MLAgility tools. The examples are split up into two sub-directories: 1. `examples/cli`: a tutorial series for the `benchit` CLI. This is the recommended starting point. -1. `examples/model_api`: scripts that demonstrate how to use the `mlagility.benchmark_workload()` API. +1. `examples/model_api`: scripts that demonstrate how to use the `mlagility.benchmark_model()` API. 1. `examples/script_api`: scripts that demonstrate how to use the `mlagility.benchmark_script()` API. 1. `examples/onnxflow`: scripts that demonstrate how to use the `onnxflow.buildit()` API. diff --git a/models/llm_layer/llama_layer_prototype.py b/models/llm_layer/llama_layer_prototype.py index c61c5219..6fc42795 100644 --- a/models/llm_layer/llama_layer_prototype.py +++ b/models/llm_layer/llama_layer_prototype.py @@ -12,6 +12,10 @@ def call_llama_layer(params="7B", use_cache=False): + # Use different torch seeds for KV caching vs. not, so that + # the models end up with different mlagility hashes + # Remove the if-statement when + # https://github.com/groq/mlagility/issues/316 is fixed torch.manual_seed(0) # Parsing command-line arguments diff --git a/src/mlagility/__init__.py b/src/mlagility/__init__.py index 8c5443f7..f92ad481 100644 --- a/src/mlagility/__init__.py +++ b/src/mlagility/__init__.py @@ -1,5 +1,5 @@ from mlagility.version import __version__ from .api.script_api import benchmark_script -from .api.model_api import benchmark_workload +from .api.model_api import benchmark_model from .cli.cli import main as benchitcli diff --git a/src/mlagility/analysis/analysis.py b/src/mlagility/analysis/analysis.py index 05c39406..125520f3 100644 --- a/src/mlagility/analysis/analysis.py +++ b/src/mlagility/analysis/analysis.py @@ -21,7 +21,7 @@ import mlagility.analysis.util as util import mlagility.analysis.tf_helpers as tf_helpers import mlagility.common.labels as labels -from mlagility.api.model_api import benchmark_workload +from mlagility.api.model_api import benchmark_model import mlagility.common.filesystem as filesystem @@ -135,7 +135,7 @@ def explore_workload( ) workload_info.status_message_color = printing.Colors.WARNING else: - perf = benchmark_workload( + perf = benchmark_model( model_info.model, inputs, device=tracer_args.device, @@ -154,11 +154,11 @@ def explore_workload( onnx_opset=tracer_args.onnx_opset, ) if Action.BENCHMARK in tracer_args.actions: - workload_info.status_message = "Successfully benchmarked!" + workload_info.status_message = "Model successfully benchmarked!" workload_info.performance = perf workload_info.status_message_color = printing.Colors.OKGREEN else: - workload_info.status_message = "Successfully built!" + workload_info.status_message = "Model successfully built!" workload_info.status_message_color = printing.Colors.OKGREEN except exp.StageError: @@ -473,7 +473,7 @@ def forward_spy(*args, **kwargs): model_info.workloads[workload_hash] = util.WorkloadInfo( hash=workload_hash, is_target=workload_hash in tracer_args.targets - or len(tracer_args.targets) == 0, + or tracer_args.targets == [], input_shapes=input_shapes, parent_hash=parent_workload_hash, ) diff --git a/src/mlagility/analysis/status.py b/src/mlagility/analysis/status.py index 14397650..265d9a10 100644 --- a/src/mlagility/analysis/status.py +++ b/src/mlagility/analysis/status.py @@ -13,7 +13,7 @@ def update(models_found: Dict[str, ModelInfo]) -> None: os.system("clear") printing.logn( - "\nWorkloads discovered during profiling:\n", + "\nModels discovered during profiling:\n", c=printing.Colors.BOLD, ) recursive_print(models_found, None, None, []) diff --git a/src/mlagility/api/model_api.py b/src/mlagility/api/model_api.py index d8e43d73..dc664019 100644 --- a/src/mlagility/api/model_api.py +++ b/src/mlagility/api/model_api.py @@ -22,7 +22,7 @@ MLAGILITY_DEFAULT_REBUILD_POLICY = "if_needed" -def benchmark_workload( +def benchmark_model( model: Any, inputs: Dict[str, Any], build_name: str, diff --git a/src/mlagility/api/script_api.py b/src/mlagility/api/script_api.py index 502601ac..50076a0d 100644 --- a/src/mlagility/api/script_api.py +++ b/src/mlagility/api/script_api.py @@ -11,7 +11,7 @@ import mlagility.cli.spawn as spawn import mlagility.common.filesystem as filesystem import mlagility.common.labels as labels_library -from mlagility.api.model_api import benchmark_workload +from mlagility.api.model_api import benchmark_model from mlagility.api.devices import SUPPORTED_DEVICES, DEFAULT_RUNTIME from mlagility.analysis.analysis import ( evaluate_script, @@ -312,7 +312,7 @@ def benchmark_files( """ Inspect the input_files and sort them into .py and .onnx files. - Pass .py files into benchmark_script() and .onnx files into benchmark_workload(). + Pass .py files into benchmark_script() and .onnx files into benchmark_model(). """ python_scripts = [] @@ -352,7 +352,7 @@ def benchmark_files( groqview=groqview, ) - # Iterate and pass each ONNX file into benchmark_workload() one at a time + # Iterate and pass each ONNX file into benchmark_model() one at a time for onnx_file in onnx_files: build_name = filesystem.clean_script_name(onnx_file) @@ -370,7 +370,7 @@ def benchmark_files( ) for runtime in runtimes: - benchmark_workload( + benchmark_model( model=onnx_file, inputs=None, build_name=build_name, diff --git a/test/model_api.py b/test/model_api.py index 72a27ad2..3eb6014d 100644 --- a/test/model_api.py +++ b/test/model_api.py @@ -8,7 +8,7 @@ import onnxflow.common.cache as cache import onnxflow.justbuildit.export as export import onnxflow.common.build as build -from mlagility import benchmark_workload +from mlagility import benchmark_model class SmallPytorchModel(torch.nn.Module): @@ -62,7 +62,7 @@ def setUp(self) -> None: def test_001_build_pytorch_model(self): build_name = "build_pytorch_model" - benchmark_workload( + benchmark_model( pytorch_model, inputs, build_name=build_name, @@ -104,7 +104,7 @@ def fire(self, state): ], ) - benchmark_workload( + benchmark_model( pytorch_model, inputs, build_name=build_name, @@ -119,7 +119,7 @@ def fire(self, state): def test_003_local_benchmark(self): build_name = "local_benchmark" - perf = benchmark_workload( + perf = benchmark_model( pytorch_model, inputs, device="x86", @@ -147,7 +147,7 @@ def test_004_onnx_opset(self): user_opset = 15 assert user_opset != build.DEFAULT_ONNX_OPSET - perf = benchmark_workload( + perf = benchmark_model( pytorch_model, inputs, device="x86", From 5d85530dc31c53309f8a9f84b6be749b65056175 Mon Sep 17 00:00:00 2001 From: Daniel Holanda Noronha Date: Wed, 21 Jun 2023 14:02:30 -0700 Subject: [PATCH 30/35] Suggested changes --- models/llm_layer/llama_layer_prototype.py | 4 ---- src/mlagility/analysis/analysis.py | 2 +- src/mlagility/analysis/status.py | 2 +- src/mlagility/version.py | 2 +- 4 files changed, 3 insertions(+), 7 deletions(-) diff --git a/models/llm_layer/llama_layer_prototype.py b/models/llm_layer/llama_layer_prototype.py index 6fc42795..c61c5219 100644 --- a/models/llm_layer/llama_layer_prototype.py +++ b/models/llm_layer/llama_layer_prototype.py @@ -12,10 +12,6 @@ def call_llama_layer(params="7B", use_cache=False): - # Use different torch seeds for KV caching vs. not, so that - # the models end up with different mlagility hashes - # Remove the if-statement when - # https://github.com/groq/mlagility/issues/316 is fixed torch.manual_seed(0) # Parsing command-line arguments diff --git a/src/mlagility/analysis/analysis.py b/src/mlagility/analysis/analysis.py index 125520f3..a7d913f8 100644 --- a/src/mlagility/analysis/analysis.py +++ b/src/mlagility/analysis/analysis.py @@ -473,7 +473,7 @@ def forward_spy(*args, **kwargs): model_info.workloads[workload_hash] = util.WorkloadInfo( hash=workload_hash, is_target=workload_hash in tracer_args.targets - or tracer_args.targets == [], + or len(tracer_args.targets) == 0, input_shapes=input_shapes, parent_hash=parent_workload_hash, ) diff --git a/src/mlagility/analysis/status.py b/src/mlagility/analysis/status.py index 265d9a10..4cd480fc 100644 --- a/src/mlagility/analysis/status.py +++ b/src/mlagility/analysis/status.py @@ -121,7 +121,7 @@ def print_workload( if model_info.depth == 0 and len(model_info.workloads) > 1: printing.logn( - f"\n{ident}\tWorkload {workload_idx+1} (executed {workload.executed}x{exec_time})", + f"\n{ident}\twith input shape {workload_idx+1} (executed {workload.executed}x{exec_time})", c=printing.Colors.OKGREEN, ) diff --git a/src/mlagility/version.py b/src/mlagility/version.py index 1fe90f6a..0aff436e 100644 --- a/src/mlagility/version.py +++ b/src/mlagility/version.py @@ -1 +1 @@ -__version__ = "3.1.4" +__version__ = "3.1.5" From c67f14073db142e677a4e376166d2f1dc30a1715 Mon Sep 17 00:00:00 2001 From: Daniel Holanda Noronha Date: Wed, 21 Jun 2023 14:23:59 -0700 Subject: [PATCH 31/35] Replacing the term workloads by invocations --- src/mlagility/analysis/analysis.py | 104 +++++++++++++++-------------- src/mlagility/analysis/status.py | 73 ++++++++++---------- src/mlagility/analysis/util.py | 15 +++-- 3 files changed, 103 insertions(+), 89 deletions(-) diff --git a/src/mlagility/analysis/analysis.py b/src/mlagility/analysis/analysis.py index a7d913f8..4f273e0e 100644 --- a/src/mlagility/analysis/analysis.py +++ b/src/mlagility/analysis/analysis.py @@ -64,22 +64,22 @@ def torch_activations(self) -> List[str]: return act -def _store_traceback(workload_info: util.WorkloadInfo): +def _store_traceback(invocation_info: util.UniqueInvocationInfo): """ - Store the traceback from an exception into workload_info so that + Store the traceback from an exception into invocation_info so that we can print it during the status update. """ exc_type, exc_value, exc_traceback = sys.exc_info() - workload_info.traceback = traceback.format_exception( + invocation_info.traceback = traceback.format_exception( exc_type, exc_value, exc_traceback ) -def explore_workload( +def explore_invocation( model_inputs: dict, model_info: util.ModelInfo, - workload_info: util.WorkloadInfo, + invocation_info: util.UniqueInvocationInfo, tracer_args: TracerArgs, ) -> None: """ @@ -87,8 +87,8 @@ def explore_workload( """ # Update status to "computing" - workload_info.status_message = "Computing..." - workload_info.status_message_color = printing.Colors.OKBLUE + invocation_info.status_message = "Computing..." + invocation_info.status_message_color = printing.Colors.OKBLUE status.update(tracer_args.models_found) # Get a copy of the keyword arguments @@ -115,10 +115,10 @@ def explore_workload( inputs[all_args[i]] = torch.tensor(args[i].detach().numpy()) else: inputs[all_args[i]] = args[i] - workload_info.inputs = inputs + invocation_info.inputs = inputs build_name = filesystem.get_build_name( - tracer_args.script_name, tracer_args.labels, workload_info.hash + tracer_args.script_name, tracer_args.labels, invocation_info.hash ) # Save model labels @@ -128,12 +128,12 @@ def explore_workload( perf = None try: if model_info.model_type == build.ModelType.PYTORCH_COMPILED: - workload_info.status_message = ( + invocation_info.status_message = ( "Skipping model compiled using torch.compile(). " "benchit requires models to be in eager mode " "(regardless of what runtime you have selected)." ) - workload_info.status_message_color = printing.Colors.WARNING + invocation_info.status_message_color = printing.Colors.WARNING else: perf = benchmark_model( model_info.model, @@ -154,36 +154,36 @@ def explore_workload( onnx_opset=tracer_args.onnx_opset, ) if Action.BENCHMARK in tracer_args.actions: - workload_info.status_message = "Model successfully benchmarked!" - workload_info.performance = perf - workload_info.status_message_color = printing.Colors.OKGREEN + invocation_info.status_message = "Model successfully benchmarked!" + invocation_info.performance = perf + invocation_info.status_message_color = printing.Colors.OKGREEN else: - workload_info.status_message = "Model successfully built!" - workload_info.status_message_color = printing.Colors.OKGREEN + invocation_info.status_message = "Model successfully built!" + invocation_info.status_message_color = printing.Colors.OKGREEN except exp.StageError: build_state = build.load_state( cache_dir=tracer_args.cache_dir, build_name=build_name ) - workload_info.status_message = "Build Error: see log files for details." - workload_info.status_message_color = printing.Colors.WARNING + invocation_info.status_message = "Build Error: see log files for details." + invocation_info.status_message_color = printing.Colors.WARNING - _store_traceback(workload_info) + _store_traceback(invocation_info) except exp.Error: - workload_info.status_message = "GroqFlowError: see log files for details." - workload_info.status_message_color = printing.Colors.WARNING + invocation_info.status_message = "GroqFlowError: see log files for details." + invocation_info.status_message_color = printing.Colors.WARNING - _store_traceback(workload_info) + _store_traceback(invocation_info) # This broad exception is ok since enumerating all exceptions is # not possible, as the tested software continuously evolves. except Exception as e: # pylint: disable=broad-except util.stop_stdout_forward() - workload_info.status_message = f"Unknown benchit error: {e}" - workload_info.status_message_color = printing.Colors.WARNING + invocation_info.status_message = f"Unknown benchit error: {e}" + invocation_info.status_message_color = printing.Colors.WARNING - _store_traceback(workload_info) + _store_traceback(invocation_info) finally: # Ensure that stdout is not being forwarded before updating status if hasattr(sys.stdout, "terminal"): @@ -254,12 +254,12 @@ def get_model_hash( return build.hash_model(model, model_type, hash_params=False)[:8] -def get_workload_hash( - model_hash: str, parent_workload_hash: str, args: Tuple, kwargs: Dict +def get_invocation_hash( + model_hash: str, parent_invocation_hash: str, args: Tuple, kwargs: Dict ) -> str: """ - Combines the model hash and the input shapes to create the workload hash - We also ensure that workloads that come from different workload parents have different hashes + Combines the model hash and the input shapes to create the invocation hash + We also ensure that invocations that come from different parents have different hashes """ # Merge positional and keyword args @@ -269,7 +269,9 @@ def get_workload_hash( # Get input shapes and types input_shapes, input_dtypes = build.get_shapes_and_dtypes(kwargs) - hashable_content = f"{model_hash}{parent_workload_hash}{input_shapes}{input_dtypes}" + hashable_content = ( + f"{model_hash}{parent_invocation_hash}{input_shapes}{input_dtypes}" + ) return hashlib.sha256(hashable_content.encode()).hexdigest()[:8], input_shapes @@ -456,49 +458,53 @@ def forward_spy(*args, **kwargs): parent_hash, ) - # Get parent workload hash - parent_workload_hash = None + # Get parent invocation hash + parent_invocation_hash = None if parent_hash: - parent_workload_hash = tracer_args.models_found[ + parent_invocation_hash = tracer_args.models_found[ parent_hash - ].last_workload_executed + ].last_unique_invocation_executed model_hash = get_model_hash(local_var, model_type) - workload_hash, input_shapes = get_workload_hash( - model_hash, parent_workload_hash, args, kwargs + invocation_hash, input_shapes = get_invocation_hash( + model_hash, parent_invocation_hash, args, kwargs ) model_info = tracer_args.models_found[model_hash] - if workload_hash not in model_info.workloads: - model_info.workloads[workload_hash] = util.WorkloadInfo( - hash=workload_hash, - is_target=workload_hash in tracer_args.targets + if invocation_hash not in model_info.unique_invocations: + model_info.unique_invocations[ + invocation_hash + ] = util.UniqueInvocationInfo( + hash=invocation_hash, + is_target=invocation_hash in tracer_args.targets or len(tracer_args.targets) == 0, input_shapes=input_shapes, - parent_hash=parent_workload_hash, + parent_hash=parent_invocation_hash, ) - model_info.last_workload_executed = workload_hash + model_info.last_unique_invocation_executed = invocation_hash # Keep track of execution time start_time = time.time() outputs = old_forward(*args, **kwargs) end_time = time.time() - workload_info = model_info.workloads[workload_hash] - workload_info.exec_time = workload_info.exec_time + end_time - start_time - workload_info.executed = workload_info.executed + 1 + invocation_info = model_info.unique_invocations[invocation_hash] + invocation_info.exec_time = ( + invocation_info.exec_time + end_time - start_time + ) + invocation_info.executed = invocation_info.executed + 1 # Call groqit if this is the first time the model is being executed # and this model has been selected by the user if ( - workload_info.executed == 1 - and workload_info.is_target + invocation_info.executed == 1 + and invocation_info.is_target and (model_info.build_model) ): - explore_workload( + explore_invocation( model_inputs=[args, kwargs], model_info=model_info, - workload_info=workload_info, + invocation_info=invocation_info, tracer_args=tracer_args, ) # Ensure that groqit() doesn't interfere with our execution count diff --git a/src/mlagility/analysis/status.py b/src/mlagility/analysis/status.py index 4cd480fc..297625fc 100644 --- a/src/mlagility/analysis/status.py +++ b/src/mlagility/analysis/status.py @@ -22,7 +22,7 @@ def update(models_found: Dict[str, ModelInfo]) -> None: def recursive_print( models_found: Dict[str, ModelInfo], parent_model_hash: Union[str, None] = None, - parent_workload_hash: Union[str, None] = None, + parent_invocation_hash: Union[str, None] = None, script_names_visited: List[str] = False, ) -> None: script_names_visited = [] @@ -30,16 +30,16 @@ def recursive_print( for model_hash in models_found.keys(): model_visited = False model_info = models_found[model_hash] - workload_idx = 0 - for workload_hash in model_info.workloads.keys(): - workload = model_info.workloads[workload_hash] + invocation_idx = 0 + for invocation_hash in model_info.unique_invocations.keys(): + unique_invocation = model_info.unique_invocations[invocation_hash] if ( parent_model_hash == model_info.parent_hash - and workload.executed > 0 + and unique_invocation.executed > 0 and ( - model_info.workloads[workload_hash].parent_hash - == parent_workload_hash + model_info.unique_invocations[invocation_hash].parent_hash + == parent_invocation_hash ) ): print_file_name = False @@ -48,15 +48,15 @@ def recursive_print( if model_info.depth == 0: print_file_name = True - print_workload( + print_invocation( model_info, - workload_hash, + invocation_hash, print_file_name, - workload_idx=workload_idx, + invocation_idx=invocation_idx, model_visited=model_visited, ) model_visited = True - workload_idx += 1 + invocation_idx += 1 if print_file_name: script_names_visited.append(model_info.script_name) @@ -64,38 +64,38 @@ def recursive_print( recursive_print( models_found, parent_model_hash=model_hash, - parent_workload_hash=workload_hash, + parent_invocation_hash=invocation_hash, script_names_visited=script_names_visited, ) -def print_workload( +def print_invocation( model_info: ModelInfo, - workload_hash: Union[str, None], + invocation_hash: Union[str, None], print_file_name: bool = False, - workload_idx: int = 0, + invocation_idx: int = 0, model_visited: bool = False, ) -> None: """ Print information about a given model or submodel """ - workload = model_info.workloads[workload_hash] + unique_invocation = model_info.unique_invocations[invocation_hash] ident = "\t" * (2 * model_info.depth + 1) if print_file_name: print(f"{model_info.script_name}.py:") - if workload.exec_time == 0 or model_info.build_model: + if unique_invocation.exec_time == 0 or model_info.build_model: exec_time = "" else: - exec_time = f" - {workload.exec_time:.2f}s" + exec_time = f" - {unique_invocation.exec_time:.2f}s" - if model_info.depth == 0 and len(model_info.workloads) > 1: + if model_info.depth == 0 and len(model_info.unique_invocations) > 1: if not model_visited: printing.logn(f"{ident}{model_info.name}") else: printing.log(f"{ident}{model_info.name}") printing.logn( - f" (executed {workload.executed}x{exec_time})", + f" (executed {unique_invocation.executed}x{exec_time})", c=printing.Colors.OKGREEN, ) @@ -119,44 +119,47 @@ def print_workload( f"{ident}\tParameters:\t{'{:,}'.format(model_info.params)} ({model_size} MB)" ) - if model_info.depth == 0 and len(model_info.workloads) > 1: + if model_info.depth == 0 and len(model_info.unique_invocations) > 1: printing.logn( - f"\n{ident}\twith input shape {workload_idx+1} (executed {workload.executed}x{exec_time})", + f"\n{ident}\twith input shape {invocation_idx+1} (executed {unique_invocation.executed}x{exec_time})", c=printing.Colors.OKGREEN, ) # Prepare input shape to be printed - input_shape = dict(model_info.workloads[workload_hash].input_shapes) + input_shape = dict(model_info.unique_invocations[invocation_hash].input_shapes) input_shape = {key: value for key, value in input_shape.items() if value != ()} input_shape = str(input_shape).replace("{", "").replace("}", "") print(f"{ident}\tInput Shape:\t{input_shape}") - print(f"{ident}\tHash:\t\t" + workload_hash) + print(f"{ident}\tHash:\t\t" + invocation_hash) # Print benchit results if benchit was run - if workload.performance: + if unique_invocation.performance: printing.log(f"{ident}\tStatus:\t\t") printing.logn( - f"Successfully benchmarked on {workload.performance.device} ({workload.performance.runtime} v{workload.performance.runtime_version})", - c=workload.status_message_color, + f"Successfully benchmarked on {unique_invocation.performance.device} ({unique_invocation.performance.runtime} v{unique_invocation.performance.runtime_version})", + c=unique_invocation.status_message_color, ) printing.logn( - f"{ident}\t\t\tMean Latency:\t{workload.performance.mean_latency:.3f}" - f"\t{workload.performance.latency_units}" + f"{ident}\t\t\tMean Latency:\t{unique_invocation.performance.mean_latency:.3f}" + f"\t{unique_invocation.performance.latency_units}" ) printing.logn( - f"{ident}\t\t\tThroughput:\t{workload.performance.throughput:.1f}" - f"\t{workload.performance.throughput_units}" + f"{ident}\t\t\tThroughput:\t{unique_invocation.performance.throughput:.1f}" + f"\t{unique_invocation.performance.throughput_units}" ) print() else: - if workload.is_target and model_info.build_model: + if unique_invocation.is_target and model_info.build_model: printing.log(f"{ident}\tStatus:\t\t") - printing.logn(f"{workload.status_message}", c=workload.status_message_color) + printing.logn( + f"{unique_invocation.status_message}", + c=unique_invocation.status_message_color, + ) - if workload.traceback is not None: + if unique_invocation.traceback is not None: if os.environ.get("MLAGILITY_TRACEBACK") != "False": - for line in workload.traceback: + for line in unique_invocation.traceback: for subline in line.split("\n")[:-1]: print(f"{ident}\t{subline}") diff --git a/src/mlagility/analysis/util.py b/src/mlagility/analysis/util.py index 3ac4903c..0c2b4845 100644 --- a/src/mlagility/analysis/util.py +++ b/src/mlagility/analysis/util.py @@ -17,7 +17,12 @@ class AnalysisException(Exception): @dataclass -class WorkloadInfo: +class UniqueInvocationInfo: + """ + Refers to unique static model invocations + (i.e. models executed with unique input shapes) + """ + hash: Union[str, None] = None parent_hash: Union[str, None] = None performance: MeasuredPerformance = None @@ -44,10 +49,10 @@ class ModelInfo: hash: Union[str, None] = None parent_hash: Union[str, None] = None old_forward: Union[Callable, None] = None - workloads: Union[Dict[str, WorkloadInfo], None] = dataclasses.field( - default_factory=dict - ) - last_workload_executed: Union[str, None] = None + unique_invocations: Union[ + Dict[str, UniqueInvocationInfo], None + ] = dataclasses.field(default_factory=dict) + last_unique_invocation_executed: Union[str, None] = None build_model: bool = False model_type: build.ModelType = build.ModelType.PYTORCH From 336d56388515afcf889c220b85531fbcf5c572cc Mon Sep 17 00:00:00 2001 From: Daniel Holanda Noronha Date: Wed, 21 Jun 2023 15:03:57 -0700 Subject: [PATCH 32/35] Add documentation for new feature --- examples/cli/discovery.md | 2 +- examples/cli/readme.md | 39 ++++++++++++++++ examples/cli/scripts/multiple_invocations.py | 47 ++++++++++++++++++++ src/mlagility/analysis/status.py | 2 +- 4 files changed, 88 insertions(+), 2 deletions(-) create mode 100644 examples/cli/scripts/multiple_invocations.py diff --git a/examples/cli/discovery.md b/examples/cli/discovery.md index 8efe2e56..f4a382f3 100644 --- a/examples/cli/discovery.md +++ b/examples/cli/discovery.md @@ -112,7 +112,7 @@ You can see that `hello_world.py`, `two_models.py`, and `max_depth.py` are all e > See the [Benchmark Multiple Scripts documentation](https://github.com/groq/mlagility/blob/main/docs/tools_user_guide.md#benchmark-multiple-scripts) for more details. -### Maximum Analysis Depth +## Maximum Analysis Depth PyTorch models (eg, `torch.nn.Module`) are often built out of a collection of smaller instances. For example, a PyTorch multilayer perceptron (MLP) model may be built out of many `torch.nn.Linear` modules. diff --git a/examples/cli/readme.md b/examples/cli/readme.md index 6b833ea3..83f1ea9d 100644 --- a/examples/cli/readme.md +++ b/examples/cli/readme.md @@ -17,6 +17,7 @@ In this tutorial you will learn things such as: - [A "hello world" example, which is the easiest way to get started](#hello-world) - [Benchmarking on Nvidia GPUs](#nvidia-benchmarking) - [Working with scripts that invoke more than one model](#multiple-models-per-script) +- [Working with scripts that a model multiple times](#multiple-invocations-of-a-model) - [Benchmarking an ONNX file](#onnx-benchmarking) # Just Benchmark BERT @@ -171,6 +172,44 @@ Woohoo! The 'benchmark' command is complete. You can see that both model instances in `two_models.py`, `pytorch_model` and `another_pytorch_model`, are both discovered and benchmarked. +## Multiple Invocations of a Model + +The same model may be invoked using different input shapes (e.g. when varying the batch size). Only one invocation is processed my MLAgility if the same model is invoked multiple times using inputs of the same shape. However, multiple invocation are processed my MLAgility if the same model is invoked multiple times using inputs of different shapes. + +The `multiple_invocations.py` script instantiates a single script and invokes it three times. The fist two times the model is invoked with inputs of the same shape (batch 1), while the third invocation uses a different input shape (batch 2). Note that two unique static model invocations are identified. + +Run the following command: + +``` +benchit scripts/multiple_invocations.py +``` + +To get a result like: +``` +Models discovered during profiling: + +multiple_invocations.py: + pytorch_model + Model Type: Pytorch (torch.nn.Module) + Class: SmallModel () + Location: /net/home/dhnoronha/mlagility/examples/cli/scripts/multiple_invocations.py, line 40 + Parameters: 60 (<0.1 MB) + + With input shape 1 (executed 2x) + Input Shape: 'x': (1, 11) + Hash: b4aa73ae + Status: Successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz (ort v1.14.1) + Mean Latency: 0.013 milliseconds (ms) + Throughput: 77909.6 inferences per second (IPS) + + With input shape 2 (executed 1x) + Input Shape: 'x': (2, 11) + Hash: cfaa2e2c + Status: Successfully benchmarked on Intel(R) Xeon(R) CPU @ 2.20GHz (ort v1.14.1) + Mean Latency: 0.015 milliseconds (ms) + Throughput: 64938.1 inferences per second (IPS) +``` + ## ONNX Benchmarking If you already happen to have an ONNX file, `benchit` can benchmark it for you. We can demonstrate this with the ONNX file in `examples/cli/onnx/sample.onnx`. diff --git a/examples/cli/scripts/multiple_invocations.py b/examples/cli/scripts/multiple_invocations.py new file mode 100644 index 00000000..6ddc7e89 --- /dev/null +++ b/examples/cli/scripts/multiple_invocations.py @@ -0,0 +1,47 @@ +# labels: name::multiple_invocations +""" +This example demonstrates what happens when your script contains +a model that is invoked multiple times with different input shapes + +To try it, run: + +benchit multiple_invocations.py + +You should see the two unique invocations being identified. +""" +import torch + +torch.manual_seed(1) + +# Define model class +class SmallModel(torch.nn.Module): + def __init__(self, input_features, output_size): + super(SmallModel, self).__init__() + self.fc = torch.nn.Linear(input_features, output_size) + + def forward(self, x): + # x has shape (batch_size, input_features) + # Set the batch size dimension to -1 to allow for flexibility + x = x.view(-1, x.size(1)) + + output = self.fc(x) + + # Reshape the output to restore the original batch size dimension + output = output.view(-1, output_size) + return output + + +# Instantiate model and generate inputs +input_features = 11 +output_size = 5 +pytorch_model = SmallModel(input_features, output_size) + +# Create 3 sets of inputs +batch_size = 1 +inputs1 = {"x": torch.rand(batch_size, input_features)} +inputs2 = {"x": torch.rand(batch_size, input_features)} +inputs3 = {"x": torch.rand(batch_size + 1, input_features)} + +pytorch_model(**inputs1) +pytorch_model(**inputs2) +pytorch_model(**inputs3) diff --git a/src/mlagility/analysis/status.py b/src/mlagility/analysis/status.py index 297625fc..1062725f 100644 --- a/src/mlagility/analysis/status.py +++ b/src/mlagility/analysis/status.py @@ -121,7 +121,7 @@ def print_invocation( if model_info.depth == 0 and len(model_info.unique_invocations) > 1: printing.logn( - f"\n{ident}\twith input shape {invocation_idx+1} (executed {unique_invocation.executed}x{exec_time})", + f"\n{ident}\tWith input shape {invocation_idx+1} (executed {unique_invocation.executed}x{exec_time})", c=printing.Colors.OKGREEN, ) From 17f0e69362672d15a5aa4dfe308d635a92f04f39 Mon Sep 17 00:00:00 2001 From: jfowers Date: Thu, 22 Jun 2023 07:56:53 -0700 Subject: [PATCH 33/35] Fix tutorial typo --- examples/cli/readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/cli/readme.md b/examples/cli/readme.md index 83f1ea9d..37b9a626 100644 --- a/examples/cli/readme.md +++ b/examples/cli/readme.md @@ -17,7 +17,7 @@ In this tutorial you will learn things such as: - [A "hello world" example, which is the easiest way to get started](#hello-world) - [Benchmarking on Nvidia GPUs](#nvidia-benchmarking) - [Working with scripts that invoke more than one model](#multiple-models-per-script) -- [Working with scripts that a model multiple times](#multiple-invocations-of-a-model) +- [Working with scripts that invoke a model multiple times](#multiple-invocations-of-a-model) - [Benchmarking an ONNX file](#onnx-benchmarking) # Just Benchmark BERT From 18aca163001b0613abb676940c42a5a78b40be38 Mon Sep 17 00:00:00 2001 From: jfowers Date: Thu, 22 Jun 2023 08:01:30 -0700 Subject: [PATCH 34/35] Copy editing the multiple invokation tutorial --- examples/cli/readme.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/cli/readme.md b/examples/cli/readme.md index 37b9a626..7a5f0547 100644 --- a/examples/cli/readme.md +++ b/examples/cli/readme.md @@ -174,9 +174,11 @@ You can see that both model instances in `two_models.py`, `pytorch_model` and `a ## Multiple Invocations of a Model -The same model may be invoked using different input shapes (e.g. when varying the batch size). Only one invocation is processed my MLAgility if the same model is invoked multiple times using inputs of the same shape. However, multiple invocation are processed my MLAgility if the same model is invoked multiple times using inputs of different shapes. +A single script may invoke the same model multiple times using different input shapes (e.g. when varying the batch size). When this happens, MLAgility will benchmark and display each of those invocations as sub-results of the same model instance. -The `multiple_invocations.py` script instantiates a single script and invokes it three times. The fist two times the model is invoked with inputs of the same shape (batch 1), while the third invocation uses a different input shape (batch 2). Note that two unique static model invocations are identified. +> **Note**: multiple invocations of a model with the same input shape will only be benchmarked once. + +The `multiple_invocations.py` script instantiates a single model and invokes it three times. The fist two times the model is invoked with inputs of the same shape (batch 1), while the third invocation uses a different input shape (batch 2). Note that two unique static model invocations are identified. Run the following command: From daf13a827156a1f09476aeb1677bd219e820dbe0 Mon Sep 17 00:00:00 2001 From: jfowers Date: Thu, 22 Jun 2023 08:08:40 -0700 Subject: [PATCH 35/35] Note issue in the code --- .github/workflows/test_mlagility.yml | 2 ++ src/mlagility/analysis/status.py | 1 + 2 files changed, 3 insertions(+) diff --git a/.github/workflows/test_mlagility.yml b/.github/workflows/test_mlagility.yml index e3b84794..89ea6e4a 100644 --- a/.github/workflows/test_mlagility.yml +++ b/.github/workflows/test_mlagility.yml @@ -56,6 +56,8 @@ jobs: rm -rf ~/.cache/mlagility benchit examples/cli/scripts/hello_world.py rm -rf ~/.cache/mlagility + benchit examples/cli/scripts/multiple_invocations.py + rm -rf ~/.cache/mlagility benchit examples/cli/scripts/max_depth.py --max-depth 1 rm -rf ~/.cache/mlagility benchit examples/cli/scripts/two_models.py diff --git a/src/mlagility/analysis/status.py b/src/mlagility/analysis/status.py index 1062725f..ec88ca56 100644 --- a/src/mlagility/analysis/status.py +++ b/src/mlagility/analysis/status.py @@ -113,6 +113,7 @@ def print_invocation( print(f"{ident}\tLocation:\t{model_info.file}, line {model_info.line}") # Converting number of parameters to MB assuming 2 bytes per parameter + # NOTE: https://github.com/groq/mlagility/issues/330 suggests eliminating this assumption model_size = model_info.params * 2 / (1024 * 1024) model_size = "{:.1f}".format(model_size) if model_size > 0.1 else "<0.1" print(