Skip to content

Commit

Permalink
[onert/odc] Auto-compilation. Add nnfw_run_with_auto_compilation meth…
Browse files Browse the repository at this point in the history
…od. (#14412)

This PR for `odc:auto-compilation` introduces `nnfw_run_with_auto_compilation`,`nnfw_odc_delete_minmax_file` and `nnfw_set_odc_param_minmax_records_count` methods to NNFW API.

ONE-DCO-1.0-Signed-off-by: Evgenii Maltsev [email protected]
  • Loading branch information
Torrero authored Dec 6, 2024
1 parent 8f0db8a commit 112f55c
Show file tree
Hide file tree
Showing 4 changed files with 384 additions and 0 deletions.
52 changes: 52 additions & 0 deletions runtime/onert/api/nnfw/include/nnfw_experimental.h
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,58 @@ NNFW_STATUS nnfw_set_codegen_model_path(nnfw_session *session, const char *path)
*/
NNFW_STATUS nnfw_codegen(nnfw_session *session, const char *target, NNFW_CODEGEN_PREF pref);

/**
* @brief Set MinMax records count in auto compilation mode with on-device compiler
*
* This function set MinMax records count for quantization in auto compilation mode.
* To enable automatic compilation mode, use {@link nnfw_run_with_auto_compilation}
*
* @param[in] session nnfw_session
* @param[in] minmax_records_count minmax records count
* @return @c NNFW_STATUS_NO_ERROR if successful, otherwise return @c NNFW_STATUS_ERROR
*/
NNFW_STATUS nnfw_set_odc_param_minmax_records_count(nnfw_session *session,
int minmax_records_count);

/**
* @brief Delete MinMax file for on-device compiler
*
* @param[in] session nnfw_session
* @return @c NNFW_STATUS_NO_ERROR if successful, otherwise return @c NNFW_STATUS_ERROR
*/
NNFW_STATUS nnfw_odc_delete_minmax_file(nnfw_session *session);

/**
* @brief Run inference with auto compilation
*
* <p>This function runs inference with automatic compilation and replaces
* the original model with a quantized or compiled model inside.
* During the inference the minmax statistics is collected and after that quantization is performed.
* If quantization was successful, try to code generating for target backend, otherwise run original
float model.
* If compilation was successful, run compiled model, otherwise run quantized model.
* On-device compiler (ODC) provides quantization and compilation functionality.
* Function should be called after model is loaded by {@link nnfw_load_model_from_file},
* session is prepared for inference by {@link nnfw_prepare}, set input and output buffers
* by {@link nnfw_set_input} and {@link nnfw_set_output}.
*
* Additionally the following parameters should be set up :
* 1. Quantization type {@link nnfw_set_quantization_type }
* 2. Quantizated model path {@link nnfw_set_quantized_model_path }
* 3. Minmax records threshold for quantization {@link nnfw_set_odc_param_minmax_records_count }
* 3. File with minMax statistics can be removed by {@link nnfw_odc_delete_minmax_file}
* 4. Compiled model path {@link nnfw_set_codegen_model_path}
* </p>
*
* @param[in] session nnfw_session
* @param[in] target Target backend to generate code as in {@link nnfw_codegen}
* @param[in] pref @c NNFW_CODEGEN_PREF
* @return @c NNFW_STATUS_NO_ERROR if successful, otherwise return @c NNFW_STATUS_ERROR
*/
NNFW_STATUS nnfw_run_with_auto_compilation(nnfw_session *session, const char *target,
NNFW_CODEGEN_PREF pref);

//////////////////////////////////////////////
// APIs for configuration
//////////////////////////////////////////////
Expand Down
19 changes: 19 additions & 0 deletions runtime/onert/api/nnfw/src/nnfw_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,25 @@ NNFW_STATUS nnfw_codegen(nnfw_session *session, const char *target, NNFW_CODEGEN
return session->codegen(target, pref);
}

NNFW_STATUS nnfw_set_odc_param_minmax_records_count(nnfw_session *session, int minmax_records_count)
{
NNFW_RETURN_ERROR_IF_NULL(session);
return session->set_odc_param_minmax_records_count(minmax_records_count);
}

NNFW_STATUS nnfw_odc_delete_minmax_file(nnfw_session *session)
{
NNFW_RETURN_ERROR_IF_NULL(session);
return session->delete_odc_minmax_file();
}

NNFW_STATUS nnfw_run_with_auto_compilation(nnfw_session *session, const char *target,
NNFW_CODEGEN_PREF pref)
{
NNFW_RETURN_ERROR_IF_NULL(session);
return session->run_with_auto_compilation(target, pref);
}

// Configuration

NNFW_STATUS nnfw_set_prepare_config(nnfw_session *session, const NNFW_PREPARE_CONFIG key,
Expand Down
297 changes: 297 additions & 0 deletions runtime/onert/api/nnfw/src/nnfw_api_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2028,3 +2028,300 @@ NNFW_STATUS nnfw_session::reset_execute_config()

return NNFW_STATUS_NO_ERROR;
}

NNFW_STATUS nnfw_session::set_odc_param_minmax_records_count(int minmax_records_count)
{
if (isStateInitialized() || isStateRunning())
{
std::cerr << "invalid state" << std::endl;
return NNFW_STATUS_INVALID_STATE;
}

if (_quant_manager->setMinMaxRecordsThreshold(minmax_records_count))
return NNFW_STATUS_NO_ERROR;
else
return NNFW_STATUS_ERROR;
}

NNFW_STATUS nnfw_session::delete_odc_minmax_file()
{
if (isStateRunning())
{
std::cerr << "invalid state" << std::endl;
return NNFW_STATUS_INVALID_STATE;
}

if (_quant_manager->deleteMinMaxFile())
return NNFW_STATUS_NO_ERROR;
else
return NNFW_STATUS_ERROR;
}

// run with auto compilation
NNFW_STATUS nnfw_session::run_with_auto_compilation(const char *target, NNFW_CODEGEN_PREF pref)
{

if (!isStatePreparedOrFinishedRun())
{
std::cerr << "Error during nnfw_session::run_with_auto_compilation : "
<< "run should be after preparation" << std::endl;
return NNFW_STATUS_INVALID_STATE;
}

// Check quantization and code-generation parameters
std::string target_str{target};
if (_quant_manager->exportModelPath().empty() || _codegen_manager->exportModelPath().empty() ||
target_str.empty() || target_str.substr(target_str.size() - 4) != "-gen")
{
std::cerr << "Error during nnfw_session::run_with_auto_compilation : "
<< "quantization and code generation parameters should be set" << std::endl;
return NNFW_STATUS_INVALID_STATE;
}

// Odc: auto compilation with hidden switching mechanizm
// Check is model already quantized or compiled
std::ifstream file_quantized_model(_quant_manager->exportModelPath());
std::ifstream file_compiled_model(_codegen_manager->exportModelPath());

if (!file_quantized_model.good() && !file_compiled_model.good())
{
// Run float model and try to quantize it
{
// Save execution options
auto saved_options = _execution->executionOptions();
// turn on minmax recording
_execution->executionOptions().dump_minmax = true;

try
{
_execution->execute();
}
catch (const onert::InsufficientBufferSizeException &e)
{
// Currently insufficient buffer always means output buffer.
std::cerr << "Error during nnfw_session::run_with_auto_compilation : " << e.what()
<< std::endl;
return NNFW_STATUS_INSUFFICIENT_OUTPUT_SIZE;
}
catch (const std::exception &e)
{
std::cerr << "Error during nnfw_session::run_with_auto_compilation : " << e.what()
<< std::endl;
return NNFW_STATUS_ERROR;
}

_state = State::FINISHED_RUN;

// restore min_max option to user defined state
_execution->executionOptions().dump_minmax = saved_options.dump_minmax;

// if enough statistics are collected, then run the quantization
if (_quant_manager->readyForQuantize())
{
try
{
if (isStateInitialized() || isStateRunning())
{
std::cerr << "invalid state" << std::endl;
return NNFW_STATUS_INVALID_STATE;
}

auto result = _quant_manager->quantize(_model_path);
if (!result)
return NNFW_STATUS_INVALID_STATE;

// remove minmax file
result = _quant_manager->deleteMinMaxFile();
if (!result)
return NNFW_STATUS_INVALID_STATE;
}
catch (const std::exception &e)
{
std::cerr
<< "Error during nnfw_session::run_with_auto_compilation in quantize operation: "
<< e.what() << std::endl;
return NNFW_STATUS_ERROR;
}
}
}
}
else
{
// run compiled or quantized model
NNFW_STATUS status;

// turn off minmax recording
_execution->executionOptions().dump_minmax = false;

// save initial buffers if quantized model or compiled model is not loaded
if (_autoCompilationState == nnfw_session::AutoCompilationState::INITIAL_STATE)
{
auto dotidx = _codegen_manager->exportModelPath().rfind('.');
if (dotidx == std::string::npos)
{
std::cerr << "Error during nnfw_session::run_with_auto_compilation : Invalid compiled "
"model path. Please use a "
"path that includes the extension."
<< std::endl;
return NNFW_STATUS_ERROR;
}

std::string compiled_model_type =
_codegen_manager->exportModelPath().substr(dotidx + 1); // + 1 to exclude dot

dotidx = _quant_manager->exportModelPath().rfind('.');
if (dotidx == std::string::npos)
{
std::cerr << "Error during nnfw_session::run_with_auto_compilation : Invalid quantized "
"model path. Please use a "
"path that includes the extension."
<< std::endl;
return NNFW_STATUS_ERROR;
}
std::string quantized_model_type =
_quant_manager->exportModelPath().substr(dotidx + 1); // + 1 to exclude dot

// Save initial (float) input and output buffers
auto input_size = _compiler_artifact->_executors->inputSize();
auto output_size = _compiler_artifact->_executors->outputSize();

std::vector<const void *> _input_buffers;
std::vector<void *> _output_buffers;

// Save Inputs buffers
for (size_t input_index = 0; input_index < input_size; input_index++)
{
auto io_input_index = onert::ir::IOIndex(input_index);
auto input_Shape = _execution->getInputShape(io_input_index);
auto input_buffer = _execution->getInputBuffer(io_input_index);

_input_buffers.push_back(input_buffer);
}

// Save Outputs buffers
for (size_t output_index = 0; output_index < output_size; output_index++)
{
auto io_output_index = onert::ir::IOIndex(output_index);

auto output_Shape = _execution->getOutputShape(io_output_index);
auto output_buffer = _execution->getOutputBuffer(io_output_index);

_output_buffers.push_back(output_buffer);
}

// Save execution options
auto saved_options = _execution->executionOptions();

// if there is compiled model - try to load it
if (file_compiled_model.good())
{
// load compiled model
status = loadModelFile(_codegen_manager->exportModelPath(), compiled_model_type);
if (status == NNFW_STATUS_NO_ERROR)
{
_autoCompilationState = nnfw_session::AutoCompilationState::COMPILED_MODEL_LOADED;
}
}
else // there is no compiled model - try to compile and load it
{

// avoiding code duplication use existing "codegen" function. Set up _model_path for the
// codegen function.
// TODO: change it if codegen function will be generalized
_model_path = _quant_manager->exportModelPath();

// try to compile and load compiled model
status = codegen(target, pref);
if (status == NNFW_STATUS_NO_ERROR)
{
_autoCompilationState = nnfw_session::AutoCompilationState::COMPILED_MODEL_LOADED;
// TODO delete quantized model
}
}

// loading compiled model is fail - try to load quantized model
if (_autoCompilationState != nnfw_session::AutoCompilationState::COMPILED_MODEL_LOADED)
{
// load quantized model
status = loadModelFile(_quant_manager->exportModelPath(), quantized_model_type);
if (status != NNFW_STATUS_NO_ERROR)
return status;
else
_autoCompilationState = nnfw_session::AutoCompilationState::QUANTIZED_MODEL_LOADED;
}

status = prepare();
if (status != NNFW_STATUS_NO_ERROR)
return status;

// Restore execution options
_execution->executionOptions() = saved_options;

// Restore inputs to the quantized or compiled model
for (uint32_t input_index = 0; input_index < _input_buffers.size(); input_index++)
{
nnfw_tensorinfo ti;
status = input_tensorinfo(input_index, &ti);
if (status != NNFW_STATUS_NO_ERROR)
return status;

ti.dtype = NNFW_TYPE_TENSOR_FLOAT32;
auto input_size_in_bytes = getBufSize(&ti);

status = set_input(input_index, ti.dtype, _input_buffers[input_index], input_size_in_bytes);

if (status != NNFW_STATUS_NO_ERROR)
return status;
}

// Restore outputs to the quantized or compiled model
for (uint32_t output_index = 0; output_index < _output_buffers.size(); output_index++)
{

nnfw_tensorinfo ti;
status = output_tensorinfo(output_index, &ti);
if (status != NNFW_STATUS_NO_ERROR)
return status;

ti.dtype = NNFW_TYPE_TENSOR_FLOAT32;

uint64_t output_size_in_bytes = getBufSize(&ti);

status =
set_output(output_index, ti.dtype, _output_buffers[output_index], output_size_in_bytes);
if (status != NNFW_STATUS_NO_ERROR)
return status;
}
}

// Run quantized model
if (!isStatePreparedOrFinishedRun())
{
std::cerr << "Error during nnfw_session::run_with_auto_compilation : "
<< "run should be run after prepare" << std::endl;
return NNFW_STATUS_INVALID_STATE;
}

try
{
_execution->execute();
}
catch (const onert::InsufficientBufferSizeException &e)
{
// Currently insufficient buffer always means output buffer.
std::cerr << "Error during nnfw_session::run_with_auto_compilation : " << e.what()
<< std::endl;
return NNFW_STATUS_INSUFFICIENT_OUTPUT_SIZE;
}
catch (const std::exception &e)
{
std::cerr << "Error during nnfw_session::run_with_auto_compilation : " << e.what()
<< std::endl;
return NNFW_STATUS_ERROR;
}

_state = State::FINISHED_RUN;
}

return NNFW_STATUS_NO_ERROR;
}
Loading

0 comments on commit 112f55c

Please sign in to comment.