-
Notifications
You must be signed in to change notification settings - Fork 47
/
Copy pathgpa_profiler.h
183 lines (134 loc) · 5.98 KB
/
gpa_profiler.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
//==============================================================================
// Copyright (c) 2016-2021 Advanced Micro Devices, Inc. All rights reserved.
/// @author AMD Developer Tools Team
/// @file
/// @brief Internal class to support profiling GPA calls themselves.
//==============================================================================
#ifndef GPU_PERF_API_COMMON_GPA_PROFILER_H_
#define GPU_PERF_API_COMMON_GPA_PROFILER_H_
// To use, add this header and the GPAProfiler.cpp file to your project. Use the PROFILER_FUNCTION() macro
// giving the name of the function as a parameter at the very beginning of each function to include in profiling.
// Use START_PROFILING() to begin measurements, and STOP_PROFILING() to finish.
// Use WRITE_PROFILE_REPORT(filename) to write a text report to the specified filename. a csv extension is a good choice.
// Define ENABLE_PROFILING for all projects which reference the profiler.
// note: currently this is written to work for single-threaded apps without access to QueryPerformanceCounter.
// For multi-threaded apps with access to QPC, a QPC version of the timing code should be added.
// The results will look similar to the following:
// Time profiling = 0.762326(2053145454)
// Total time in functions = 0.501066(1349502989)
// % time in functions = 65.7286
// Timing errors = 0
// Function # of calls in % of total time total % of total time total time time per call total time in time in per call
// GpaBeginPass 9 0.628688 0.628688 0.00315014(8484159) 0.000350016(942684) 0.00315014(8484159) 0.000350016(942684)
// GpaBeginSample 1647 0.440372 0.440372 0.00220656(5942836) 1.33964e-006(3608) 0.00220656(5942836) 1.33964e-006(3608)
// GPA_BeginSession 1 5.65964 5.68611 0.0284912(76734166) 0.0284912(76734166) 0.0283585(76376984) 0.0283585(76376984)
// Description of output:
// All numbers not in brackets are in seconds.
// Function: name of function profiled
// # of calls: number of times the function was called
// in % of total time: % of total time in profiled functions which was spent inside this function (not including any profiled
// functions called by it). This is usually the primary sort key since it gives the functions where the most
// time was spent.
// total % of total time: % of total time in profiled functions, which was spent in this function or any functions it called.
// total time: total time spend in the function (includes time spent in all functions it called).
// time per call: average time spent in the function each call. (includes time spent in all functions it called).
// total time in: total time spent within this function, not including time spent in profiled functions called by it.
// time in per call: average time spent within this function per call, not including time spent in profiled functions called by it.
#ifdef ENABLE_PROFILING
#include <windows.h>
#include <map>
#include <mutex>
#include <sstream>
#include <string>
#include <vector>
// These macros refer to a singleton profiling object defined in GPAProfiler.cpp
/// Macro to use a scope-bound object to profile a function.
#define PROFILE_FUNCTION(func) ScopeProfile temp_scope_profile_object(#func)
/// Macro to begin profiling a section.
#define BEGIN_PROFILE_SECTION(func) (profiler_singleton.EnterFunction(#func))
/// Macro to end profiling a section.
#define END_PROFILE_SECTION(func) (profiler_singleton.LeaveFunction(#func))
/// Macro to start profiling.
#define START_PROFILING() (profiler_singleton.Start())
/// Macro to stop profiling.
#define STOP_PROFILING() (profiler_singleton.Stop())
/// Macro to check if profiling is active.
#define IS_PROFILING() (profiler_singleton.Active())
/// Macro to generate a profile report.
#define GENERATE_PROFILE_REPORT() (profiler_singleton.GenerateReport())
/// Macro to write a profile report.
#define WRITE_PROFILE_REPORT(file_name) (profiler_singleton.WriteReport(file_name))
class FunctionInfo
{
public:
unsigned int calls;
__int64 total_time_;
__int64 time_below_;
};
class Profiler
{
public:
Profiler();
bool Init();
void Start();
void Stop();
void Reset();
bool Active() const
{
return is_active_;
}
bool EnterFunction(const char* function_name);
bool LeaveFunction(const char* function_name);
FunctionInfo& GetFunctionInfo(const char* function_name);
std::string GenerateReport();
void WriteReport(std::string file_name);
protected:
void OutputTime(std::stringstream& ss, __int64 time) const;
__int64 rdtsc_ticks_per_second_;
std::vector<__int64> started_timestamps_;
std::vector<__int64> total_time_below_parent_;
std::map<std::string, FunctionInfo> function_map_;
__int64 total_time_;
unsigned int timing_errors_;
bool is_active_;
__int64 start_time_;
__int64 stop_time_;
};
extern Profiler profiler_singleton;
class ScopeProfile
{
public:
ScopeProfile(const char* function_name)
{
gpa_profiler_mutex_.lock();
function_name_ = function_name;
profiler_singleton.EnterFunction(function_name);
gpa_profiler_mutex_.unlock();
}
~ScopeProfile()
{
profiler_singleton.LeaveFunction(function_name_);
}
protected:
const char* function_name_;
std::mutex gpa_profiler_mutex_;
};
#else
/// Macro to use a scope-bound object to profile a function.
#define PROFILE_FUNCTION(func)
/// Macro to begin profiling a section.
#define BEGIN_PROFILE_SECTION(func)
/// Macro to end profiling a section.
#define END_PROFILE_SECTION(func)
/// Macro to start profiling.
#define START_PROFILING()
/// Macro to stop profiling.
#define STOP_PROFILING()
/// Macro to check if profiling is active.
#define IS_PROFILING() (false)
/// Macro to generate a profile report.
#define GENERATE_PROFILE_REPORT()
/// Macro to write a profile report.
#define WRITE_PROFILE_REPORT(file_name)
#endif
#endif // GPU_PERF_API_COMMON_GPA_PROFILER_H_