Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds support for skipping profiling if the Result is found in the checkpoint #191

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions genai-perf/genai_perf/config/generate/genai_perf_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,18 @@ def get_obj_args(self) -> Namespace:

return obj_args

###########################################################################
# Representation Methods
###########################################################################
def representation(self) -> str:
"""
A string representation of the GAP options which will be
used when determining if a previous (checkpointed) run can be used
"""
representation = " ".join([self.input.__str__(), self.output_tokens.__str__()])

return representation

###########################################################################
# Checkpoint Methods
###########################################################################
Expand Down
38 changes: 37 additions & 1 deletion genai-perf/genai_perf/config/run/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from copy import deepcopy
from dataclasses import dataclass, field
from typing import List

Expand All @@ -18,11 +19,18 @@
from genai_perf.types import (
CheckpointObject,
GpuMetricObjectives,
ModelName,
ModelWeights,
PerfMetricObjectives,
RunConfigName,
)


@dataclass(frozen=True)
class ResultsDefaults:
STARTING_ID = -1


@dataclass
class Results:
"""
Expand Down Expand Up @@ -80,11 +88,29 @@ def get_results_failing_constraints(self) -> "Results":

return failing_results

def get_run_config_name_based_on_representation(
self, model_name: ModelName, representation: str
) -> RunConfigName:
"""
Returns the name of the RunConfig if the representation is found,
else creates a new name by incrementing the config ID
"""
max_run_config_id = ResultsDefaults.STARTING_ID
for run_config in self.run_configs:
if representation == run_config.representation():
return run_config.name
else:
max_run_config_id = max(
max_run_config_id, int(run_config.get_name_id())
)

return f"{model_name}_run_config_{max_run_config_id+1}"

###########################################################################
# Set Accessor Methods
###########################################################################
def add_run_config(self, run_config: RunConfig) -> None:
self.run_configs.append(run_config)
self.run_configs.append(deepcopy(run_config))
self.run_configs.sort(reverse=True)

def set_gpu_metric_objectives(
Expand All @@ -110,3 +136,13 @@ def set_model_weighting(self, model_weights: ModelWeights) -> None:
def set_constraints(self, constraints: RunConstraints) -> None:
for run_config in self.run_configs:
run_config.set_constraints(constraints)

###########################################################################
# Misc Methods
###########################################################################
def found_representation(self, representation: str) -> bool:
for run_config in self.run_configs:
if representation == run_config.representation():
return True

return False
36 changes: 33 additions & 3 deletions genai-perf/genai_perf/config/run/run_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,10 @@ class RunConfig:
# triton_env: Dict[str, Any]
# model_run_configs: List[ModelRunConfig]

name: RunConfigName
genai_perf_config: GenAIPerfConfig
perf_analyzer_config: PerfAnalyzerConfig
measurement: RunConfigMeasurement
name: RunConfigName = ""
measurement: RunConfigMeasurement = RunConfigMeasurement()

###########################################################################
# Checkpoint Methods
Expand Down Expand Up @@ -88,7 +88,10 @@ def create_class_from_checkpoint(
)

run_config = RunConfig(
name, genai_perf_config, perf_analyzer_config, measurement
name=name,
genai_perf_config=genai_perf_config,
perf_analyzer_config=perf_analyzer_config,
measurement=measurement,
)

return run_config
Expand Down Expand Up @@ -138,6 +141,16 @@ def get_weighted_perf_metric_values(
perf_metric_name, return_value
)

def get_name_id(self) -> str:
"""
Return the unique ID assigned to a RunConfig's name
by convention this is the final part of the string after
the underscore
"""
name_fields = self.name.split("_")

return name_fields[-1]

###########################################################################
# Set Accessor Methods
###########################################################################
Expand All @@ -164,6 +177,23 @@ def add_perf_metrics(
) -> None:
self.measurement.add_perf_metrics(model_name, perf_metrics)

###########################################################################
# Representation Methods
###########################################################################
def representation(self) -> str:
"""
A string representation of the RunConfig options which will be
used when determining if a previous (checkpointed) run can be used
"""
representation = " ".join(
[
self.perf_analyzer_config.representation(),
self.genai_perf_config.representation(),
]
)

return representation

###########################################################################
# Constraint Methods
###########################################################################
Expand Down
4 changes: 2 additions & 2 deletions genai-perf/genai_perf/measurements/run_config_measurement.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ class RunConfigMeasurement:

def __init__(
self,
gpu_metrics: GpuRecords,
gpu_metrics: Optional[GpuRecords] = None,
run_constraints: Optional[RunConstraints] = None,
):
"""
Expand All @@ -82,7 +82,7 @@ def __init__(
A set of constraints (set by the user) used to determine if
this is a valid measurement
"""
self._gpu_metrics = gpu_metrics
self._gpu_metrics = gpu_metrics if gpu_metrics else {}
self._gpu_metric_objectives: Optional[GpuMetricObjectives] = (
RunConfigMeasurementDefaults.METRIC_OBJECTIVE
)
Expand Down
122 changes: 69 additions & 53 deletions genai-perf/genai_perf/subcommand/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,63 +90,79 @@ def analyze_handler(args: Namespace) -> None:
obj_args = perf_analyzer_config.get_obj_args()

#
# Create Input/Artifacts
input_config_options = create_config_options(obj_args)
create_artifacts_dirs(obj_args)
tokenizer = get_tokenizer(
obj_args.tokenizer,
obj_args.tokenizer_trust_remote_code,
obj_args.tokenizer_revision,
)
generate_inputs(input_config_options)

#
# Run PA
run_perf_analyzer(
args=obj_args,
perf_analyzer_config=perf_analyzer_config,
telemetry_data_collector=telemetry_data_collector,
)

#
# Extract Perf Metrics
infer_mode, load_level = _determine_infer_mode_and_load_level(
obj_args, objectives, model_name
)
data_parser = calculate_metrics(obj_args, tokenizer)
perf_stats = data_parser.get_statistics(infer_mode, load_level)
perf_metrics = perf_stats.create_records()

#
# Extract Telemetry Metrics
# FIXME: Once I'm able to collect telemetry records will need
# to write a method to hook this up
# telemetry_stats = (
# telemetry_data_collector.get_statistics()
# if telemetry_data_collector
# else None
# )
gpu_metrics: GpuRecords = {}

#
# Create RunConfigMeasurement
run_config_measurement = RunConfigMeasurement(gpu_metrics)
run_config_measurement.add_perf_metrics(model_name, perf_metrics)

#
# Create RunConfig
run_config_name = model_name + "_run_config_" + str(count)
run_config = RunConfig(
name=run_config_name,
# Check if this configuration has already been profiled (is in the checkpoint)
representation = RunConfig(
genai_perf_config=genai_perf_config,
perf_analyzer_config=perf_analyzer_config,
measurement=run_config_measurement,
).representation()

run_config_found = results.found_representation(representation)
run_config_name = results.get_run_config_name_based_on_representation(
model_name, representation
)

#
# Add to results and write checkpoint
results.add_run_config(run_config)
checkpoint.create_checkpoint_object()
if not run_config_found:
#
# Create Input/Artifacts
input_config_options = create_config_options(obj_args)
create_artifacts_dirs(obj_args)
tokenizer = get_tokenizer(
obj_args.tokenizer,
obj_args.tokenizer_trust_remote_code,
obj_args.tokenizer_revision,
)
generate_inputs(input_config_options)

#
# Run PA
run_perf_analyzer(
args=obj_args,
perf_analyzer_config=perf_analyzer_config,
telemetry_data_collector=telemetry_data_collector,
)

#
# Extract Perf Metrics
infer_mode, load_level = _determine_infer_mode_and_load_level(
obj_args, objectives, model_name
)
data_parser = calculate_metrics(obj_args, tokenizer)
perf_stats = data_parser.get_statistics(infer_mode, load_level)
perf_metrics = perf_stats.create_records()

#
# Extract Telemetry Metrics
# FIXME: Once I'm able to collect telemetry records will need
# to write a method to hook this up
# telemetry_stats = (
# telemetry_data_collector.get_statistics()
# if telemetry_data_collector
# else None
# )
gpu_metrics: GpuRecords = {}

#
# Create RunConfigMeasurement
run_config_measurement = RunConfigMeasurement(gpu_metrics)
run_config_measurement.add_perf_metrics(model_name, perf_metrics)

#
# Create RunConfig
run_config = RunConfig(
name=run_config_name,
genai_perf_config=genai_perf_config,
perf_analyzer_config=perf_analyzer_config,
measurement=run_config_measurement,
)

#
# Add to results and write checkpoint
results.add_run_config(run_config)
checkpoint.create_checkpoint_object()
else:
logger.info(
f"{run_config_name} found in checkpoint - skipping profiling..."
)


def _setup_config(args: Namespace) -> ConfigCommand:
Expand Down
14 changes: 14 additions & 0 deletions genai-perf/tests/test_genai_perf_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,20 @@ def test_default_config_and_objective_capture(self):
expected_output_tokens_config, self._default_genai_perf_config.output_tokens
)

###########################################################################
# Test Representation
###########################################################################
def test_representation(self):
"""
Test that the representation is created correctly
"""
expected_representation = " ".join(
[ConfigInput(num_prompts=50).__str__(), ConfigOutputTokens().__str__()]
)
representation = self._default_genai_perf_config.representation()

self.assertEqual(expected_representation, representation)

###########################################################################
# Checkpoint Tests
###########################################################################
Expand Down
Loading
Loading