Skip to content

Commit

Permalink
fix pre-commit
Browse files Browse the repository at this point in the history
Remove duplicates, unnecessary spaces
  • Loading branch information
lkomali committed Oct 31, 2024
1 parent 855f11b commit d8628bf
Show file tree
Hide file tree
Showing 6 changed files with 128 additions and 144 deletions.
50 changes: 13 additions & 37 deletions src/command_line_parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1767,8 +1767,6 @@ CLParser::ParseCommandLine(int argc, char** argv)
params_->search_mode = SearchMode::NONE;
}

// When the request-count feature is enabled, override the measurement mode to
// be count windows with a window size of the requested count
// When the request-count feature is enabled, override the measurement mode to
// be count windows with a window size of the requested count
if (params_->request_count) {
Expand Down Expand Up @@ -2017,43 +2015,22 @@ CLParser::VerifyOptions()
"--triton-server-path should not be empty when using "
"service-kind=triton_c_api.");
}
if (params_->kind == cb::BackendKind::TRITON_C_API) {
if (params_->triton_server_path.empty()) {
Usage(
"--triton-server-path should not be empty when using "
"service-kind=triton_c_api.");
}

if (params_->model_repository_path.empty()) {
Usage(
"--model-repository should not be empty when using "
"service-kind=triton_c_api.");
}
if (params_->model_repository_path.empty()) {
Usage(
"--model-repository should not be empty when using "
"service-kind=triton_c_api.");
}

// Decoupled models run via Triton C API do not support shared memory
if (params_->async && params_->streaming &&
params_->shared_memory_type != SharedMemoryType::NO_SHARED_MEMORY) {
Usage(
"Cannot use --shared-memory=system or --shared-memory=cuda "
"with "
"--service-kind=triton_c_api and --async and --streaming.");
}
// Decoupled models run via Triton C API do not support shared memory
if (params_->async && params_->streaming &&
params_->shared_memory_type != SharedMemoryType::NO_SHARED_MEMORY) {
Usage(
"Cannot use --shared-memory=system or --shared-memory=cuda "
"with "
"--service-kind=triton_c_api and --async and --streaming.");
}
if (params_->model_repository_path.empty()) {
Usage(
"--model-repository should not be empty when using "
"service-kind=triton_c_api.");
}

params_->protocol = cb::ProtocolType::UNKNOWN;
// Decoupled models run via Triton C API do not support shared memory
if (params_->async && params_->streaming &&
params_->shared_memory_type != SharedMemoryType::NO_SHARED_MEMORY) {
Usage(
"Cannot use --shared-memory=system or --shared-memory=cuda "
"with "
"--service-kind=triton_c_api and --async and --streaming.");
}

params_->protocol = cb::ProtocolType::UNKNOWN;
}

Expand Down Expand Up @@ -2106,5 +2083,4 @@ CLParser::VerifyOptions()
}
}


}} // namespace triton::perfanalyzer
77 changes: 42 additions & 35 deletions src/custom_request_schedule_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,43 +27,49 @@
#include "custom_request_schedule_manager.h"

namespace triton { namespace perfanalyzer {

cb::Error CustomRequestScheduleManager::Create(
const bool async, const bool streaming,
const uint64_t measurement_window_ms, const size_t max_trials,
const std::vector<float>& schedule, const int32_t batch_size,
const size_t max_threads, const uint32_t num_of_sequences,
const SharedMemoryType shared_memory_type, const size_t output_shm_size,
const bool serial_sequences, const std::shared_ptr<ModelParser>& parser,
const std::shared_ptr<cb::ClientBackendFactory>& factory,
std::unique_ptr<LoadManager>* manager,
const std::unordered_map<std::string, cb::RequestParameter>& request_parameters)

cb::Error
CustomRequestScheduleManager::Create(
const bool async, const bool streaming,
const uint64_t measurement_window_ms, const size_t max_trials,
const std::vector<float>& schedule, const int32_t batch_size,
const size_t max_threads, const uint32_t num_of_sequences,
const SharedMemoryType shared_memory_type, const size_t output_shm_size,
const bool serial_sequences, const std::shared_ptr<ModelParser>& parser,
const std::shared_ptr<cb::ClientBackendFactory>& factory,
std::unique_ptr<LoadManager>* manager,
const std::unordered_map<std::string, cb::RequestParameter>&
request_parameters)
{
std::unique_ptr<CustomRequestScheduleManager> local_manager(new CustomRequestScheduleManager(
async, streaming, measurement_window_ms, max_trials, schedule, batch_size,
max_threads, num_of_sequences, shared_memory_type,
output_shm_size, serial_sequences, parser, factory, request_parameters));
std::unique_ptr<CustomRequestScheduleManager> local_manager(
new CustomRequestScheduleManager(
async, streaming, measurement_window_ms, max_trials, schedule,
batch_size, max_threads, num_of_sequences, shared_memory_type,
output_shm_size, serial_sequences, parser, factory,
request_parameters));

*manager = std::move(local_manager);

return cb::Error::Success;
}

CustomRequestScheduleManager::CustomRequestScheduleManager(
const bool async, const bool streaming,
const uint64_t measurement_window_ms, const size_t max_trials,
const std::vector<float>& schedule, const int32_t batch_size,
const size_t max_threads, const uint32_t num_of_sequences,
const SharedMemoryType shared_memory_type, const size_t output_shm_size,
const bool serial_sequences, const std::shared_ptr<ModelParser>& parser,
const std::shared_ptr<cb::ClientBackendFactory>& factory,
const std::unordered_map<std::string, cb::RequestParameter>& request_parameters)
: RequestRateManager(
async, streaming, Distribution::CUSTOM, batch_size,
measurement_window_ms, max_trials, max_threads, num_of_sequences,
shared_memory_type, output_shm_size, serial_sequences, parser,
factory, request_parameters), schedule_(schedule)
{
const bool async, const bool streaming,
const uint64_t measurement_window_ms, const size_t max_trials,
const std::vector<float>& schedule, const int32_t batch_size,
const size_t max_threads, const uint32_t num_of_sequences,
const SharedMemoryType shared_memory_type, const size_t output_shm_size,
const bool serial_sequences, const std::shared_ptr<ModelParser>& parser,
const std::shared_ptr<cb::ClientBackendFactory>& factory,
const std::unordered_map<std::string, cb::RequestParameter>&
request_parameters)
: RequestRateManager(
async, streaming, Distribution::CUSTOM, batch_size,
measurement_window_ms, max_trials, max_threads, num_of_sequences,
shared_memory_type, output_shm_size, serial_sequences, parser,
factory, request_parameters),
schedule_(schedule)
{
}

cb::Error
Expand All @@ -90,7 +96,9 @@ CustomRequestScheduleManager::ChangeRequestRate(
return cb::Error::Success;
}

void CustomRequestScheduleManager::GenerateSchedule(const double request_rate, const std::vector<float>& schedule)
void
CustomRequestScheduleManager::GenerateSchedule(
const double request_rate, const std::vector<float>& schedule)
{
std::vector<float> scaled_schedule;
scaled_schedule.reserve(schedule.size());
Expand All @@ -99,8 +107,7 @@ void CustomRequestScheduleManager::GenerateSchedule(const double request_rate, c
scaled_schedule.push_back(value / static_cast<float>(request_rate));
}
}
auto worker_schedules =
CreateWorkerSchedules(schedule);
auto worker_schedules = CreateWorkerSchedules(schedule);
GiveSchedulesToWorkers(worker_schedules);
}

Expand All @@ -109,15 +116,15 @@ CustomRequestScheduleManager::CreateWorkerSchedules(
const std::vector<float>& schedule)
{
std::vector<RateSchedulePtr_t> worker_schedules =
CreateEmptyWorkerSchedules();
CreateEmptyWorkerSchedules();
std::vector<size_t> thread_ids{CalculateThreadIds()};
std::chrono::nanoseconds next_timestamp(0);
size_t thread_id_index = 0;
size_t worker_index = 0;

for (const float& val : schedule) {
next_timestamp = std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::duration<float>(val));
std::chrono::duration<float>(val));
worker_index = thread_ids[thread_id_index];
thread_id_index = ++thread_id_index % thread_ids.size();
worker_schedules[worker_index]->intervals.emplace_back(next_timestamp);
Expand All @@ -127,4 +134,4 @@ CustomRequestScheduleManager::CreateWorkerSchedules(
return worker_schedules;
}

}}
}} // namespace triton::perfanalyzer
142 changes: 73 additions & 69 deletions src/custom_request_schedule_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,42 +29,43 @@
#include "request_rate_manager.h"

namespace triton { namespace perfanalyzer {

//==============================================================================
/// CustomRequestScheduleManager is a helper class to send inference requests to
/// inference server in accordance with the schedule set by the user.
///
/// Detail:
/// An instance of this load manager will be created at the beginning of the
/// perf analyzer and it will be used to schedule to send requests at that
/// particular second defined by the user. The particular seconds at which a
/// particular second defined by the user. The particular seconds at which a
/// request should be sent can be set by the user using the `schedule` option.
/// For example, if the `schedule` is set to 1,2,4,5,6.5, CustomRequestScheduleManager
/// sends request at 1st second, 2nd second, 4th second and so on.
///
/// For example, if the `schedule` is set to 1,2,4,5,6.5,
/// CustomRequestScheduleManager sends request at 1st second, 2nd second, 4th
/// second and so on.
///

class CustomRequestScheduleManager : public RequestRateManager {
public:
~CustomRequestScheduleManager() = default;
public:
~CustomRequestScheduleManager() = default;

/// Creates an object of CustomRequestScheduleManager
/// \param async Whether to use asynchronous or synchronous API for infer request
/// \param streaming Whether to use gRPC streaming API for infer request
/// \param measurement_window_ms The time window for measurements
/// \param max_trials The maximum number of windows that will be measured
/// \param schedule The vector containing the schedule for requests
/// \param batch_size The batch size used for each request
/// \param max_threads The maximum number of working threads to be spawned
/// \param num_of_sequences The number of concurrent sequences to maintain on the server
/// \param shared_memory_type The type of shared memory to use for inputs
/// \param output_shm_size The size of the shared memory to allocate for the output
/// \param serial_sequences Enable serial sequence mode
/// \param parser The ModelParser object to get the model details
/// \param factory The ClientBackendFactory object used to create client to the server
/// \param manager Returns a new CustomRequestScheduleManager object
/// \param request_parameters Custom request parameters to send to the server
/// \return cb::Error object indicating success or failure
static cb::Error Create(
/// Creates an object of CustomRequestScheduleManager
/// \param async Whether to use asynchronous or synchronous API for infer
/// request \param streaming Whether to use gRPC streaming API for infer
/// request \param measurement_window_ms The time window for measurements
/// \param max_trials The maximum number of windows that will be measured
/// \param schedule The vector containing the schedule for requests
/// \param batch_size The batch size used for each request
/// \param max_threads The maximum number of working threads to be spawned
/// \param num_of_sequences The number of concurrent sequences to maintain on
/// the server \param shared_memory_type The type of shared memory to use for
/// inputs \param output_shm_size The size of the shared memory to allocate
/// for the output \param serial_sequences Enable serial sequence mode \param
/// parser The ModelParser object to get the model details \param factory The
/// ClientBackendFactory object used to create client to the server \param
/// manager Returns a new CustomRequestScheduleManager object \param
/// request_parameters Custom request parameters to send to the server \return
/// cb::Error object indicating success or failure
static cb::Error Create(
const bool async, const bool streaming,
const uint64_t measurement_window_ms, const size_t max_trials,
const std::vector<float>& schedule, const int32_t batch_size,
Expand All @@ -74,39 +75,41 @@ class CustomRequestScheduleManager : public RequestRateManager {
const std::shared_ptr<cb::ClientBackendFactory>& factory,
std::unique_ptr<LoadManager>* manager,
const std::unordered_map<std::string, cb::RequestParameter>&
request_parameters);

/// Performs warmup for benchmarking by sending a fixed number of requests
/// according to the specified request rate
/// \param request_rate The rate at which requests must be issued to the server
/// \param warmup_request_count The number of warmup requests to send
/// \return cb::Error object indicating success or failure
cb::Error PerformWarmup(double request_rate, size_t warmup_request_count) override;
request_parameters);

/// Performs warmup for benchmarking by sending a fixed number of requests
/// according to the specified request rate
/// \param request_rate The rate at which requests must be issued to the
/// server \param warmup_request_count The number of warmup requests to send
/// \return cb::Error object indicating success or failure
cb::Error PerformWarmup(
double request_rate, size_t warmup_request_count) override;

/// Adjusts the rate of issuing requests to be the same as 'request_rate'
/// \param request_rate The rate at which requests must be issued to the server
/// \param request_count The number of requests to generate when profiling
/// \return cb::Error object indicating success or failure
cb::Error ChangeRequestRate(const double request_rate, const size_t request_count) override;
/// Adjusts the rate of issuing requests to be the same as 'request_rate'
/// \param request_rate The rate at which requests must be issued to the
/// server \param request_count The number of requests to generate when
/// profiling \return cb::Error object indicating success or failure
cb::Error ChangeRequestRate(
const double request_rate, const size_t request_count) override;

protected:
/// Constructor for CustomRequestScheduleManager
/// \param async Whether to use asynchronous or synchronous API for infer request
/// \param streaming Whether to use gRPC streaming API for infer request
/// \param measurement_window_ms The time window for measurements
/// \param max_trials The maximum number of windows that will be measured
/// \param schedule The vector containing the schedule for requests
/// \param batch_size The batch size used for each request
/// \param max_threads The maximum number of working threads to be spawned
/// \param num_of_sequences The number of concurrent sequences to maintain on the server
/// \param shared_memory_type The type of shared memory to use for inputs
/// \param output_shm_size The size of the shared memory to allocate for the output
/// \param serial_sequences Enable serial sequence mode
/// \param parser The ModelParser object to get the model details
/// \param factory The ClientBackendFactory object used to create client to the server
/// \param manager Returns a new CustomRequestScheduleManager object
/// \param request_parameters Custom request parameters to send to the server
CustomRequestScheduleManager(
protected:
/// Constructor for CustomRequestScheduleManager
/// \param async Whether to use asynchronous or synchronous API for infer
/// request \param streaming Whether to use gRPC streaming API for infer
/// request \param measurement_window_ms The time window for measurements
/// \param max_trials The maximum number of windows that will be measured
/// \param schedule The vector containing the schedule for requests
/// \param batch_size The batch size used for each request
/// \param max_threads The maximum number of working threads to be spawned
/// \param num_of_sequences The number of concurrent sequences to maintain on
/// the server \param shared_memory_type The type of shared memory to use for
/// inputs \param output_shm_size The size of the shared memory to allocate
/// for the output \param serial_sequences Enable serial sequence mode \param
/// parser The ModelParser object to get the model details \param factory The
/// ClientBackendFactory object used to create client to the server \param
/// manager Returns a new CustomRequestScheduleManager object \param
/// request_parameters Custom request parameters to send to the server
CustomRequestScheduleManager(
const bool async, const bool streaming,
const uint64_t measurement_window_ms, const size_t max_trials,
const std::vector<float>& schedule, const int32_t batch_size,
Expand All @@ -115,22 +118,23 @@ class CustomRequestScheduleManager : public RequestRateManager {
const bool serial_sequences, const std::shared_ptr<ModelParser>& parser,
const std::shared_ptr<cb::ClientBackendFactory>& factory,
const std::unordered_map<std::string, cb::RequestParameter>&
request_parameters);
request_parameters);

/// Generates and updates the request schedule as per the given request rate and schedule
/// \param request_rate The request rate to use for new schedule
/// \param schedule The vector containing the schedule for requests
void GenerateSchedule(const double request_rate, const std::vector<float>& schedule);
/// Generates and updates the request schedule as per the given request rate
/// and schedule \param request_rate The request rate to use for new schedule
/// \param schedule The vector containing the schedule for requests
void GenerateSchedule(
const double request_rate, const std::vector<float>& schedule);

/// Creates worker schedules based on the provided schedule
/// \param duration The maximum duration for the schedule
/// \param schedule The vector containing the schedule for requests
/// \return A vector of RateSchedulePtr_t representing the worker schedules
std::vector<RateSchedulePtr_t> CreateWorkerSchedules(
/// Creates worker schedules based on the provided schedule
/// \param duration The maximum duration for the schedule
/// \param schedule The vector containing the schedule for requests
/// \return A vector of RateSchedulePtr_t representing the worker schedules
std::vector<RateSchedulePtr_t> CreateWorkerSchedules(
const std::vector<float>& schedule);

/// The vector containing the schedule for requests
std::vector<float> schedule_;
/// The vector containing the schedule for requests
std::vector<float> schedule_;
};

}}
}} // namespace triton::perfanalyzer
1 change: 0 additions & 1 deletion src/inference_profiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -672,7 +672,6 @@ InferenceProfiler::Profile(
return cb::Error::Success;
}


cb::Error
InferenceProfiler::Profile(
size_t warmup_request_count, const size_t request_count,
Expand Down
1 change: 0 additions & 1 deletion src/inference_profiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@
#include "profile_data_collector.h"
#include "request_rate_manager.h"


namespace triton { namespace perfanalyzer {

#ifndef DOCTEST_CONFIG_DISABLE
Expand Down
1 change: 0 additions & 1 deletion src/request_rate_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,6 @@ class RequestRateManager : public LoadManager {
size_t DetermineNumThreads();

std::shared_ptr<std::chrono::nanoseconds> gen_duration_;
std::vector<float> schedule_;
Distribution request_distribution_;
std::chrono::steady_clock::time_point start_time_;
bool execute_;
Expand Down

0 comments on commit d8628bf

Please sign in to comment.