Skip to content

Commit

Permalink
chore: setting for test
Browse files Browse the repository at this point in the history
  • Loading branch information
keehyuna committed Nov 12, 2024
1 parent 65ea0b1 commit d0ef3cd
Show file tree
Hide file tree
Showing 5 changed files with 9 additions and 11 deletions.
4 changes: 0 additions & 4 deletions core/runtime/TRTEngine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -296,10 +296,6 @@ int64_t TRTEngine::get_automatic_device_memory_budget() {
return cuda_engine->getWeightStreamingAutomaticBudget();
}

void TRTEngine::set_pre_allocated_outputs(bool enable) {
use_pre_allocated_outputs = enable;
}

std::string TRTEngine::to_str() const {
// clang-format off
std::stringstream ss;
Expand Down
4 changes: 0 additions & 4 deletions core/runtime/execute_engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
#include "torch/csrc/jit/runtime/custom_operator.h"
#include "torch/torch.h"

#include <ATen/record_function.h>
#include "core/runtime/TRTEngineProfiler.h"
#include "core/runtime/runtime.h"
#include "core/util/prelude.h"
Expand Down Expand Up @@ -200,7 +199,6 @@ std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intr

{ // Input Setup
std::unique_ptr<torch::autograd::profiler::RecordProfile> input_profiler_guard;
RECORD_FUNCTION("process input", std::vector<c10::IValue>());
if (compiled_engine->profile_execution) {
input_profiler_guard =
std::make_unique<torch::autograd::profiler::RecordProfile>(compiled_engine->input_profile_path);
Expand Down Expand Up @@ -282,7 +280,6 @@ std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intr

{ // Output Setup
std::unique_ptr<torch::autograd::profiler::RecordProfile> output_profiler_guard;
RECORD_FUNCTION("process output", std::vector<c10::IValue>());
if (compiled_engine->profile_execution) {
output_profiler_guard =
std::make_unique<torch::autograd::profiler::RecordProfile>(compiled_engine->output_profile_path);
Expand Down Expand Up @@ -331,7 +328,6 @@ std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intr
std::unique_lock<std::mutex> lock(compiled_engine->mu);

{ // Engine Execution (execute on engine stream)
RECORD_FUNCTION("Trt runtime", std::vector<c10::IValue>());
c10::cuda::CUDAStreamGuard stream_guard(compiled_engine->engine_stream);

std::unique_ptr<torch::autograd::profiler::RecordProfile> enqueue_profiler_guard;
Expand Down
2 changes: 1 addition & 1 deletion core/runtime/register_jit_hooks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ static auto TORCHTRT_UNUSED TRTEngineTSRegistrtion =
.def("dump_engine_layer_info_to_file", &TRTEngine::dump_engine_layer_info_to_file)
.def("dump_engine_layer_info", &TRTEngine::dump_engine_layer_info)
.def("get_engine_layer_info", &TRTEngine::get_engine_layer_info)
.def("set_pre_allocated_outputs", &TRTEngine::set_pre_allocated_outputs)
.def_readwrite("use_pre_allocated_outputs", &TRTEngine::use_pre_allocated_outputs)
.def_property(
"device_memory_budget",
&TRTEngine::get_device_memory_budget,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def __init__(
self.target_platform = Platform.current_platform()
self.cudagraphs_enabled = False
self.pre_allocated_outputs: List[torch.Tensor] = []
self.use_pre_allocated_outputs = False
self.use_pre_allocated_outputs = True

if self.serialized_engine is not None and not self.settings.lazy_engine_init:
self.setup_engine()
Expand Down Expand Up @@ -248,6 +248,9 @@ def create_output_tensors(self) -> List[torch.Tensor]:
outputs.append(output)
return outputs

def set_output_opt(self, enable: bool) -> None:
self.use_pre_allocated_outputs = enable

def forward(self, *inputs: torch.Tensor) -> torch.Tensor | Tuple[torch.Tensor, ...]:
# Ensure inputs are available in all scopes and cast symbolic integers to Tensors
contiguous_inputs: List[torch.Tensor] = [
Expand Down
5 changes: 4 additions & 1 deletion py/torch_tensorrt/dynamo/runtime/_TorchTensorRTModule.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ def setup_engine(self) -> None:
if self.engine is not None:
return
self.engine = torch.classes.tensorrt.Engine(self._pack_engine_info())
self.engine.set_pre_allocated_outputs(True)
self.set_output_opt(True)

def encode_metadata(self, metadata: Any) -> str:
metadata = copy.deepcopy(metadata)
Expand Down Expand Up @@ -268,6 +268,9 @@ def set_extra_state(self, state: SerializedTorchTensorRTModuleFmt) -> None:
self.input_binding_names = state[2]
self.output_binding_names = state[3]

def set_output_opt(self, enable: bool) -> None:
self.engine.use_pre_allocated_outputs = enable

def forward(self, *inputs: Any) -> torch.Tensor | Tuple[torch.Tensor, ...]:
"""Implementation of the forward pass for a TensorRT engine
Expand Down

0 comments on commit d0ef3cd

Please sign in to comment.