Skip to content

Commit

Permalink
[GPU] Add a new option for DumpProfilingData (openvinotoolkit#23006)
Browse files Browse the repository at this point in the history
### Details:
- Enable collecting profiling data only at iterations with requested
range
- For example for dump profiling data only when iteration is from 10 to
20, you can use `OV_GPU_DumpProfilingDataIteration='10..20'`

### Tickets:
 - 133260

---------

Signed-off-by: Andrew Park <[email protected]>
  • Loading branch information
andrew-k-park authored Feb 27, 2024
1 parent af5557f commit ddf7c44
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ class debug_configuration {
std::set<int64_t> dump_iteration; // Dump n-th execution of network.
std::vector<std::string> load_layers_raw_dump; // List of layers to load dumped raw binary and filenames
static const debug_configuration *get_instance();
bool is_target_dump_prof_data_iteration(int64_t iteration) const;
std::vector<std::string> get_filenames_for_matched_layer_loading_binaries(const std::string& id) const;
std::string get_name_for_dump(const std::string& file_name) const;
bool is_layer_for_dumping(const std::string& layerName, bool is_output = false, bool is_input = false) const;
Expand All @@ -154,6 +155,12 @@ class debug_configuration {
// Percentage mode preallocation
float buffers_preallocation_ratio = 0.0f;
} mem_preallocation_params;

struct dump_profiling_data_iter_params {
bool is_enabled = false;
int64_t start = 0;
int64_t end = 0;
} dump_prof_data_iter_params;
};

} // namespace cldnn
5 changes: 4 additions & 1 deletion src/plugins/intel_gpu/src/graph/network.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -855,7 +855,7 @@ void network::execute_impl(const std::vector<event::ptr>& events) {
int64_t curr_iter = -1;
GPU_DEBUG_GET_INSTANCE(debug_config);
#ifdef GPU_DEBUG_CONFIG
curr_iter = iteration++;
curr_iter = iteration;
#endif

// Wait for previous execution completion
Expand Down Expand Up @@ -1175,6 +1175,9 @@ void network::execute_impl(const std::vector<event::ptr>& events) {
GPU_DEBUG_IF(debug_config->dump_runtime_memory_pool > 0) {
get_memory_pool().dump(get_id());
}
#ifdef GPU_DEBUG_CONFIG
iteration++;
#endif
}

std::vector<primitive_id> network::get_input_ids() const {
Expand Down
51 changes: 31 additions & 20 deletions src/plugins/intel_gpu/src/graph/primitive_inst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2072,31 +2072,42 @@ bool primitive_inst::is_valid_fusion() const {
}

void primitive_inst::add_profiling_data(instrumentation::pipeline_stage stage, bool cache_hit, std::string memalloc_info, int64_t time, bool per_iter_mode) {
instrumentation::perf_counter_key key {
_network.get_input_layouts(),
_impl_params->input_layouts,
_impl_params->output_layouts,
get_implementation_name(),
stage,
GPU_DEBUG_GET_INSTANCE(debug_config);
#ifdef GPU_DEBUG_CONFIG
per_iter_mode ? get_network().get_current_iteration_num() : 0,
int64_t curr_iter = -1;
GPU_DEBUG_IF(debug_config->dump_prof_data_iter_params.is_enabled) {
curr_iter = get_network().get_current_iteration_num();
}
GPU_DEBUG_IF(curr_iter < 0 || debug_config->is_target_dump_prof_data_iteration(curr_iter)) {
#else
0,
{
#endif
cache_hit,
memalloc_info
};
instrumentation::perf_counter_key key {
_network.get_input_layouts(),
_impl_params->input_layouts,
_impl_params->output_layouts,
get_implementation_name(),
stage,
#ifdef GPU_DEBUG_CONFIG
per_iter_mode ? get_network().get_current_iteration_num() : 0,
#else
0,
#endif
cache_hit,
memalloc_info
};

auto hash = instrumentation::perf_counter_hash()(key);
auto& d = _profiling_data[hash];
if (_profiling_info.find(hash) == _profiling_info.end()) {
_profiling_info.emplace(hash, key);
}
auto hash = instrumentation::perf_counter_hash()(key);
auto& d = _profiling_data[hash];
if (_profiling_info.find(hash) == _profiling_info.end()) {
_profiling_info.emplace(hash, key);
}

auto& total_time = std::get<0>(d);
auto& total_iter = std::get<1>(d);
total_time += time;
total_iter++;
auto& total_time = std::get<0>(d);
auto& total_iter = std::get<1>(d);
total_time += time;
total_iter++;
}
}

std::string primitive_inst::get_implementation_name() const {
Expand Down
48 changes: 48 additions & 0 deletions src/plugins/intel_gpu/src/runtime/debug_configuration.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,12 @@ static void print_help_messages() {
message_list.emplace_back("OV_GPU_DumpProfilingData", "Enables dump of extended profiling information to specified directory."
" Please use OV_GPU_DumpProfilingDataPerIter=1 env variable to collect performance per iteration."
" Note: Performance impact may be significant as this option enforces host side sync after each primitive");
message_list.emplace_back("OV_GPU_DumpProfilingDataIteration", "Enable collecting profiling data only at iterations with requested range. "
"For example for dump profiling data only when iteration is from 10 to 20, you can use "
"OV_GPU_DumpProfilingDataIteration='10..20'. Additionally, you can dump profiling data only "
"from one specific iteration by giving the same values for the start and end, and the open "
"ended range is also available by range from given start to the last iteration as -1. e.g. "
"OV_GPU_DumpProfilingDataIteration='10..-1'");
message_list.emplace_back("OV_GPU_DumpGraphs", "1) dump ngraph before and after transformation. 2) dump graph in model compiling."
"3) dump graph in execution.");
message_list.emplace_back("OV_GPU_DumpSources", "Dump opencl sources");
Expand Down Expand Up @@ -227,6 +233,8 @@ debug_configuration::debug_configuration()
get_gpu_debug_env_var("DisableOnednnOptPostOps", disable_onednn_opt_post_ops);
get_gpu_debug_env_var("DumpProfilingData", dump_profiling_data);
get_gpu_debug_env_var("DumpProfilingDataPerIter", dump_profiling_data_per_iter);
std::string dump_prof_data_iter_str;
get_gpu_debug_env_var("DumpProfilingDataIteration", dump_prof_data_iter_str);
get_gpu_debug_env_var("DryRunPath", dry_run_path);
get_gpu_debug_env_var("DumpRuntimeMemoryPool", dump_runtime_memory_pool);
get_gpu_debug_env_var("BaseBatchForMemEstimation", base_batch_for_memory_estimation);
Expand Down Expand Up @@ -259,6 +267,28 @@ debug_configuration::debug_configuration()
exit(0);
}

if (dump_prof_data_iter_str.length() > 0) {
dump_prof_data_iter_str = " " + dump_prof_data_iter_str + " ";
std::istringstream iss(dump_prof_data_iter_str);
char dot;
int64_t start, end;
bool is_valid_range = false;
if (iss >> start >> dot >> dot >> end) {
if (start <= end || end == -1) {
try {
is_valid_range = true;
dump_prof_data_iter_params.start = start;
dump_prof_data_iter_params.end = end;
} catch(const std::exception& ex) {
is_valid_range = false;
}
}
}
if (!is_valid_range)
std::cout << "OV_GPU_DumpProfilingDataIteration was ignored. It cannot be parsed to valid iteration range." << std::endl;
dump_prof_data_iter_params.is_enabled = is_valid_range;
}

if (dump_layers_str.length() > 0) {
// Insert delimiter for easier parsing when used
dump_layers_str = " " + dump_layers_str + " ";
Expand Down Expand Up @@ -357,6 +387,24 @@ const debug_configuration *debug_configuration::get_instance() {
#endif
}

bool debug_configuration::is_target_dump_prof_data_iteration(int64_t iteration) const {
#ifdef GPU_DEBUG_CONFIG
if (iteration < 0)
return true;

if (dump_prof_data_iter_params.start > iteration)
return false;

if (dump_prof_data_iter_params.start <= dump_prof_data_iter_params.end &&
dump_prof_data_iter_params.end < iteration)
return false;

return true;
#else
return false;
#endif
}

std::vector<std::string> debug_configuration::get_filenames_for_matched_layer_loading_binaries(const std::string& id) const {
std::vector<std::string> file_names;
#ifdef GPU_DEBUG_CONFIG
Expand Down

0 comments on commit ddf7c44

Please sign in to comment.