Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PA: Fixed scheduler manager #88

Merged
merged 16 commits into from
Nov 7, 2024
2 changes: 2 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ set(
data_loader.cc
concurrency_manager.cc
request_rate_manager.cc
custom_request_schedule_manager.cc
load_worker.cc
concurrency_worker.cc
request_rate_worker.cc
Expand Down Expand Up @@ -160,6 +161,7 @@ set(
data_loader.h
concurrency_manager.h
request_rate_manager.h
custom_request_schedule_manager.h
custom_load_manager.h
iworker.h
load_worker.h
Expand Down
29 changes: 24 additions & 5 deletions src/command_line_parser.cc
matthewkotila marked this conversation as resolved.
Show resolved Hide resolved
matthewkotila marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ SplitString(const std::string& str, const std::string& delimiter = ":")
std::vector<std::string> substrs;
size_t pos = 0;
while (pos != std::string::npos) {
size_t colon_pos = str.find(":", pos);
size_t colon_pos = str.find(delimiter, pos);
substrs.push_back(str.substr(pos, colon_pos - pos));
if (colon_pos == std::string::npos) {
pos = colon_pos;
Expand Down Expand Up @@ -908,6 +908,7 @@ CLParser::ParseCommandLine(int argc, char** argv)
{"endpoint", required_argument, 0, long_option_idx_base + 61},
{"request-count", required_argument, 0, long_option_idx_base + 62},
{"warmup-request-count", required_argument, 0, long_option_idx_base + 63},
{"schedule", required_argument, 0, long_option_idx_base + 64},
{0, 0, 0, 0}};

// Parse commandline...
Expand Down Expand Up @@ -1647,7 +1648,9 @@ CLParser::ParseCommandLine(int argc, char** argv)
if (std::stoi(optarg) < 0) {
Usage("Failed to parse --request-count. The value must be > 0.");
}
params_->request_count = std::stoi(optarg);
if (params_->request_count == 0) {
params_->request_count = std::stoi(optarg);
}
break;
}
case long_option_idx_base + 63: {
Expand All @@ -1659,6 +1662,17 @@ CLParser::ParseCommandLine(int argc, char** argv)
params_->warmup_request_count = std::stoi(optarg);
break;
}
case long_option_idx_base + 64: {
std::vector<float> schedule;
std::string arg = optarg;
std::vector<std::string> float_strings = SplitString(optarg, ",");
for (const std::string& str : float_strings) {
schedule.push_back(std::stof(str));
}
params_->schedule = schedule;
params_->request_count = schedule.size();
break;
}
case 'v':
params_->extra_verbose = params_->verbose;
params_->verbose = true;
Expand Down Expand Up @@ -1977,9 +1991,13 @@ CLParser::VerifyOptions()
Usage(
"perf_analyzer supports only grpc protocol for TensorFlow Serving.");
} else if (params_->streaming) {
Usage("perf_analyzer does not support streaming for TensorFlow Serving.");
Usage(
"perf_analyzer does not support streaming for TensorFlow "
"Serving.");
} else if (params_->async) {
Usage("perf_analyzer does not support async API for TensorFlow Serving.");
Usage(
"perf_analyzer does not support async API for TensorFlow "
"Serving.");
} else if (!params_->using_batch_size) {
params_->batch_size = 0;
}
Expand Down Expand Up @@ -2008,7 +2026,8 @@ CLParser::VerifyOptions()
if (params_->async && params_->streaming &&
params_->shared_memory_type != SharedMemoryType::NO_SHARED_MEMORY) {
Usage(
"Cannot use --shared-memory=system or --shared-memory=cuda with "
"Cannot use --shared-memory=system or --shared-memory=cuda "
"with "
"--service-kind=triton_c_api and --async and --streaming.");
}

Expand Down
2 changes: 2 additions & 0 deletions src/command_line_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,8 @@ struct PerfAnalyzerParameters {
Range<uint64_t> periodic_concurrency_range{1, 1, 1};
uint64_t request_period{10};
size_t warmup_request_count{0};

std::vector<float> schedule{};
};

using PAParamsPtr = std::shared_ptr<PerfAnalyzerParameters>;
Expand Down
121 changes: 121 additions & 0 deletions src/custom_request_schedule_manager.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
// Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "custom_request_schedule_manager.h"

namespace triton::perfanalyzer {

cb::Error
CustomRequestScheduleManager::Create(
const pa::PAParamsPtr& params, const std::shared_ptr<ModelParser>& parser,
const std::shared_ptr<cb::ClientBackendFactory>& factory,
std::unique_ptr<LoadManager>* manager)
{
std::unique_ptr<CustomRequestScheduleManager> local_manager(
new CustomRequestScheduleManager(params, parser, factory));

*manager = std::move(local_manager);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is technically an anti-pattern. This should (1) use std::make_unique, and (2) directly store it straight into the output storage.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This gives an error because the constructor is protected and std::make_unique calls the constructor from outside of class scope, so for this to work, we should make constructor public.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fiiiiiiiiine. It's okay this way then.


return cb::Error::Success;
}

CustomRequestScheduleManager::CustomRequestScheduleManager(
const pa::PAParamsPtr& params, const std::shared_ptr<ModelParser>& parser,
const std::shared_ptr<cb::ClientBackendFactory>& factory)
: RequestRateManager(
params->async, params->streaming, Distribution::CUSTOM,
params->batch_size, params->measurement_window_ms, params->max_trials,
params->max_threads, params->num_of_sequences,
params->shared_memory_type, params->output_shm_size,
params->serial_sequences, parser, factory,
params->request_parameters),
schedule_(params->schedule)
{
}

cb::Error
CustomRequestScheduleManager::PerformWarmup(
double request_rate, size_t warmup_request_count)
{
if (warmup_request_count == 0) {
return cb::Error::Success;
}
RETURN_IF_ERROR(ChangeRequestRate(request_rate, warmup_request_count));
WaitForWarmupAndCleanup();
return cb::Error::Success;
}

cb::Error
CustomRequestScheduleManager::ChangeRequestRate(
const double request_rate, const size_t request_count)
{
PauseWorkers();
ConfigureThreads(request_count);
GenerateSchedule(request_rate, schedule_);
ResumeWorkers();

return cb::Error::Success;
}

void
CustomRequestScheduleManager::GenerateSchedule(
const double request_rate, const std::vector<float>& schedule)
{
std::vector<float> scaled_schedule;
scaled_schedule.reserve(schedule.size());
if (schedule.size() > 0) {
for (const auto& value : schedule) {
scaled_schedule.push_back(value / static_cast<float>(request_rate));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@IzzyPutterman why are the values from schedule (e.g. --schedule=1,2,3,4,4.5,6,9,10,11,13,15,18 being divided by the request rate?

I thought those values are supposed to be absolute timestamps?

cc @lkomali @debermudez

}
}
auto worker_schedules = CreateWorkerSchedules(schedule);
GiveSchedulesToWorkers(worker_schedules);
}

std::vector<RateSchedulePtr_t>
CustomRequestScheduleManager::CreateWorkerSchedules(
const std::vector<float>& schedule)
{
std::vector<RateSchedulePtr_t> worker_schedules =
CreateEmptyWorkerSchedules();
std::vector<size_t> thread_ids{CalculateThreadIds()};
std::chrono::nanoseconds next_timestamp(0);
size_t thread_id_index = 0;
size_t worker_index = 0;

for (const float& val : schedule) {
next_timestamp = std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::duration<float>(val));
worker_index = thread_ids[thread_id_index];
thread_id_index = ++thread_id_index % thread_ids.size();
worker_schedules[worker_index]->intervals.emplace_back(next_timestamp);
}
SetScheduleDurations(worker_schedules);

return worker_schedules;
}

} // namespace triton::perfanalyzer
109 changes: 109 additions & 0 deletions src/custom_request_schedule_manager.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
// Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once

#include "command_line_parser.h"
#include "load_manager.h"
#include "request_rate_manager.h"

namespace triton::perfanalyzer {

//==============================================================================
/// CustomRequestScheduleManager is a helper class to send inference requests to
/// inference server in accordance with the schedule set by the user.
///
/// Detail:
/// An instance of this load manager will be created at the beginning of the
/// perf analyzer and it will be used to schedule to send requests at that
/// particular second defined by the user. The particular seconds at which a
/// request should be sent can be set by the user using the `schedule` option.
/// For example, if the `schedule` is set to 1,2,4,5,6.5,
/// CustomRequestScheduleManager sends request at 1st second, 2nd second, 4th
/// second and so on.
///

class CustomRequestScheduleManager : public RequestRateManager {
public:
~CustomRequestScheduleManager() = default;

/// Creates an object of CustomRequestScheduleManager
/// \param params A PAParamsPtr (std::shared_ptr<PerfAnalyzerParameters>) that
/// holds configuration parameters to create CustomRequestScheduleManager
/// object
///
static cb::Error Create(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay I... am conflicted here, but here's my feedback on what I see.

This sort of API is... not great. It's extremely error prone, due to the sheer number of unnamed parameters that are mixed between input and output. Yet, it matches the current existing API from RequestRateManager, and you're just following the existing pattern, so that's not really on you, and the rest of what I'm typing here isn't directed at you, just my general statement on the state of the codebase in general.

To explain the problem: what I see here is full of what's now considered bad programming practice, and we should start considering cleaning this code. I'm just not sure if we should start now with this PR, or have a more thorough clean later, as trying to do this now may (1) take a long time for you and (2) look akward if it's not propagated throughout the rest of the codebase. But also, working incrementally towards having a better codebase is a good path forward.

  • We need to embrace named argument paradigms. C++20 allows for better syntax, but even in normal C++03, we can do something along the lines of a struct which contains the arguments to pass over. This by far is the most problematic part of this call here. This function call has way too many arguments, and it's excessively error-prone. We should instead have structured arguments to then unpack and use later. If only one of the elements from my feedback is actioned upon, it should be this one.
  • Random bool values in the middle of a function call is also error prone. A bit less so if we have structured arguments. If we do not have structured argument for bools, we should generally speaking replace them with enums to be more expressive of what exactly we're doing instead of just having a string of true and false in the middle of a function call.
  • We'd need to use something like std::span instead of passing references to vectors. Unfortunately, this is also something which only exists in C++20. We could also use Abseil here, since we have it in the dependency tree anyway transitively from gRPC.
  • The usage of shared_ptr is usually a codesmell. I need to investigate more what's going on here, but I'd really like to fully ban shared_ptr altogether from the codebase if I could. Again, depending on what's exactly being done here, but it generally speaking is a telltale of ownership issues. The fact it's passing const references to shared pointers is even more indicative of an exising problem.
  • Passing a pointer in the middle of the list of arguments for the output value is also a red flag. We should refactor the return value system we have to have a "StatusOr" paradigm, which is basically an std::variant between error codes, and return types, allowing us to either return an error, or an actual value. Abseil also provides this, but I'd rather we come up with something which matches our usage case properly, as Abseil's version is way too opiniated for our usage.
  • The mix'n'match of signed (batch_size) and unsigned (max_threads and num_of_sequences) values for size types is odd. In general we should use unsigned for anything size-related.
  • Durations, such as this measurement_window_ms, should be better typed to incorporate the time unit, ideally using std::chrono.

Now, looking at it a bit more down the code, in perf_analyzer.cc, we have a structure already with all the parameters, called params_, which we unpack into all of these arguments... Instead, we could just easily pass that structure around, and let the callees decide what to pick from them. As in, my first point in this feedback is already done, basically, one would just need to haul the structure up and down instead of unpacking / repacking it with so many arguments.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To clarify a bit my stance: I am not against smart pointers in general, just that shared_ptr are usually unique_ptr in disguise. They are non-committal about the actual ownership of the object, whereas unique_ptr has a clearer definition of it.

Copy link
Contributor

@lkomali lkomali Nov 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The shared_ptr is for client backend factory and I have no idea how much of an effort it is to modify everything to unique_ptr. I can create a ticket for that effort.
Coming to the constructor call, I agree with all your points. As far as I understand, your suggestion is to pass params object instead of the individual arguments. It makes sense to me. Again, I'm not sure if I'm going to break something by modifying the API. Let me know if I should add a ticket to investigate and modify the all the related APIs because as you said, it might look awkward if we modify for some of them and it's not propagated until some time after.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, let's just try and plumb the parameters object in this one instance, and keep in mind we should do this for newer code. If we have time, we should go back and clean older API incrementally. You're right: let's not break existing code right now, but let's avoid introducing new code which follows this older pattern.

For the unique_ptr vs shared_ptr, this was more of a general remark that we should keep in mind when adding new shared_ptr in the codebase. At some point we should review the existing usage and clean it up, but not in this one PR.

const pa::PAParamsPtr& params, const std::shared_ptr<ModelParser>& parser,
const std::shared_ptr<cb::ClientBackendFactory>& factory,
std::unique_ptr<LoadManager>* manager);

/// Performs warmup for benchmarking by sending a fixed number of requests
/// according to the specified request rate
/// \param request_rate The rate at which requests must be issued to the
/// server \param warmup_request_count The number of warmup requests to send
/// \return cb::Error object indicating success or failure
cb::Error PerformWarmup(
double request_rate, size_t warmup_request_count) override;

/// Adjusts the rate of issuing requests to be the same as 'request_rate'
/// \param request_rate The rate at which requests must be issued to the
/// server \param request_count The number of requests to generate when
/// profiling \return cb::Error object indicating success or failure
cb::Error ChangeRequestRate(
const double request_rate, const size_t request_count) override;


protected:
/// Constructor for CustomRequestScheduleManager
///
/// Initializes a CustomRequestScheduleManager instance using a PAParamsPtr
/// object that contains all necessary parameters for request scheduling.
///
/// \param params A PAParamsPtr (std::shared_ptr<PerfAnalyzerParameters>) that
/// holds configuration parameters to create CustomRequestScheduleManager
/// object
///
CustomRequestScheduleManager(
const pa::PAParamsPtr& params, const std::shared_ptr<ModelParser>& parser,
const std::shared_ptr<cb::ClientBackendFactory>& factory);

/// Generates and updates the request schedule as per the given request rate
/// and schedule \param request_rate The request rate to use for new schedule
/// \param schedule The vector containing the schedule for requests
void GenerateSchedule(
const double request_rate, const std::vector<float>& schedule);

/// Creates worker schedules based on the provided schedule
/// \param duration The maximum duration for the schedule
/// \param schedule The vector containing the schedule for requests
/// \return A vector of RateSchedulePtr_t representing the worker schedules
std::vector<RateSchedulePtr_t> CreateWorkerSchedules(
const std::vector<float>& schedule);

/// The vector containing the schedule for requests
std::vector<float> schedule_;
};

} // namespace triton::perfanalyzer
5 changes: 3 additions & 2 deletions src/inference_profiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
#include "constants.h"
#include "doctest.h"

namespace triton { namespace perfanalyzer {
namespace triton::perfanalyzer {
cb::Error
ReportPrometheusMetrics(const Metrics& metrics)
{
Expand Down Expand Up @@ -622,6 +622,7 @@ InferenceProfiler::Profile(
is_stable = false;
meets_threshold = true;


RETURN_IF_ERROR(dynamic_cast<RequestRateManager*>(manager_.get())
->PerformWarmup(request_rate, warmup_request_count));
RETURN_IF_ERROR(dynamic_cast<RequestRateManager*>(manager_.get())
Expand Down Expand Up @@ -1872,4 +1873,4 @@ InferenceProfiler::MergeMetrics(
return cb::Error::Success;
}

}} // namespace triton::perfanalyzer
} // namespace triton::perfanalyzer
6 changes: 4 additions & 2 deletions src/inference_profiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include "concurrency_manager.h"
#include "constants.h"
#include "custom_load_manager.h"
#include "custom_request_schedule_manager.h"
#include "metrics.h"
#include "metrics_manager.h"
#include "model_parser.h"
Expand All @@ -47,7 +48,7 @@
#include "profile_data_collector.h"
#include "request_rate_manager.h"

namespace triton { namespace perfanalyzer {
namespace triton::perfanalyzer {

#ifndef DOCTEST_CONFIG_DISABLE
class NaggyMockInferenceProfiler;
Expand Down Expand Up @@ -443,6 +444,7 @@ class InferenceProfiler {
std::vector<PerfStatus>& perf_statuses, bool& meets_threshold,
bool& is_stable);


/// A helper function for profiling functions.
/// \param status_summary Returns the summary of the measurement.
/// \param request_count The number of requests to generate when profiling. If
Expand Down Expand Up @@ -829,4 +831,4 @@ class InferenceProfiler {
#endif
};

}} // namespace triton::perfanalyzer
} // namespace triton::perfanalyzer
Loading
Loading