Skip to content

Commit d8266a9

Browse files
[NPU] Plugin batch - set performance_mode to THROUGHPUT (#32669)
### Details: - If the model can be processed by the `PLUGIN` batch, `performance_mode` should be set to `THROUGHPUT`. - PR will be merged once validation is completed.
1 parent 7ebfc5c commit d8266a9

File tree

2 files changed

+23
-4
lines changed

2 files changed

+23
-4
lines changed

src/plugins/intel_npu/src/plugin/src/plugin.cpp

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -701,14 +701,32 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
701701

702702
std::shared_ptr<intel_npu::IGraph> graph;
703703

704+
auto compileWithConfig = [&](const auto& modelToCompile, const auto& config) {
705+
if (!localConfig.get<WEIGHTLESS_BLOB>()) {
706+
return compiler->compile(modelToCompile, config);
707+
} else {
708+
check_weightless_cache_attribute_occurrence(model);
709+
return compiler->compileWS(modelToCompile, config);
710+
}
711+
};
712+
704713
try {
705714
_logger.debug("performing compile");
706715

707-
if (!localConfig.get<WEIGHTLESS_BLOB>()) {
708-
graph = compiler->compile(successfullyDebatched ? batchedModel : model->clone(), localConfig);
716+
// Determine which model to use
717+
auto modelToCompile = successfullyDebatched ? batchedModel : model->clone();
718+
719+
if (successfullyDebatched && localConfig.get<PERFORMANCE_HINT>() == ov::hint::PerformanceMode::LATENCY) {
720+
_logger.info("Override performance mode to THROUGHPUT for compilation");
721+
722+
auto modifiedConfig = localConfig; // Copy only when needed
723+
std::stringstream strStream;
724+
strStream << ov::hint::PerformanceMode::THROUGHPUT;
725+
modifiedConfig.update({{ov::hint::performance_mode.name(), strStream.str()}});
726+
727+
graph = compileWithConfig(modelToCompile, modifiedConfig);
709728
} else {
710-
check_weightless_cache_attribute_occurrence(model);
711-
graph = compiler->compileWS(successfullyDebatched ? batchedModel : model->clone(), localConfig);
729+
graph = compileWithConfig(modelToCompile, localConfig); // No copy
712730
}
713731
} catch (const std::exception& ex) {
714732
OPENVINO_THROW(ex.what());

src/plugins/intel_npu/src/plugin/src/transformations.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,7 @@ std::tuple<std::shared_ptr<ov::Model>, bool> handlePluginBatching(
244244
logger.info("Couldn't validate and reshape the model. Batching will be handled by compiler. Error: %s",
245245
ex.what());
246246
}
247+
247248
return {reshapedModel, successfullyDebatched};
248249
}
249250

0 commit comments

Comments
 (0)