diff --git a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp index 0728bed8f9f88b..26dcd9821697b4 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp @@ -238,7 +238,7 @@ ov::npuw::LLMCompiledModel::LLMCompiledModel(const std::shared_ptr& m std::map npuw_llm_props; std::map other_props; split_llm_properties(properties, npuw_llm_props, other_props); - + // Remove "NPUW_LLM_PREFILL_CONFIG", "NPUW_LLM_GENERATE_CONFIG" from map, // to not pass them into ::intel_npu::Config object, as we don't need to // preserve them somewhere. @@ -280,11 +280,12 @@ ov::npuw::LLMCompiledModel::LLMCompiledModel(const std::shared_ptr& m const ::intel_npu::npuw::llm::GenerateHint generate_hint = m_cfg.get<::intel_npu::NPUW_LLM_GENERATE_HINT>(); LOG_DEBUG("9. Passed GENERATE_HINT: " << std::string(::intel_npu::NPUW_LLM_GENERATE_HINT::toString(generate_hint))); - // NB: GENERATE_HINT is only applicable for default generate config! + // NB: GENERATE_HINT is only applicable for default generate config! if (generate_config_opt.has_value() && npuw_llm_props.count(ov::intel_npu::npuw::llm::generate_hint.name())) { OPENVINO_THROW("GENERATE_HINT is only applicable for default generate config!"); } - auto generate_config = opt_or_default(generate_config_opt, get_default_generate_config(model, npudesc, generate_hint)); + auto generate_config = + opt_or_default(generate_config_opt, get_default_generate_config(model, npudesc, generate_hint)); merge_config_with(prefill_config, properties_copy); merge_config_with(generate_config, properties_copy); @@ -310,7 +311,8 @@ void ov::npuw::LLMCompiledModel::set_property(const ov::AnyMap& properties) { ov::Any ov::npuw::LLMCompiledModel::get_property(const std::string& name) const { OPENVINO_SUPPRESS_DEPRECATED_START - if (name == ov::intel_npu::npuw::llm::prefill_config.name() || name == ov::intel_npu::npuw::llm::generate_config.name()) { + if (name == ov::intel_npu::npuw::llm::prefill_config.name() || + name == ov::intel_npu::npuw::llm::generate_config.name()) { OPENVINO_THROW(name, " is write-only option!"); } diff --git a/src/plugins/intel_npu/src/plugin/npuw/llm_infer_request.cpp b/src/plugins/intel_npu/src/plugin/npuw/llm_infer_request.cpp index d3147669569b65..a9615b381a6c21 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/llm_infer_request.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/llm_infer_request.cpp @@ -51,7 +51,7 @@ ov::npuw::LLMInferRequest::LLMInferRequest(const std::shared_ptr(m_prefill_request->get_tensor(m_prefill_in_ports.at("input_ids")), - m_npuw_llm_compiled_model->m_cfg.get<::intel_npu::NPUW_LLM_PAD_TOKEN_ID>()); + m_npuw_llm_compiled_model->m_cfg.get<::intel_npu::NPUW_LLM_PAD_TOKEN_ID>()); fill_tensor(m_prefill_request->get_tensor(m_prefill_in_ports.at("attention_mask")), 0); fill_tensor(m_prefill_request->get_tensor(m_prefill_in_ports.at("position_ids")), 0); fill_tensor(m_kvcache_request->get_tensor(m_kvcache_in_ports.at("attention_mask")), 0); @@ -114,11 +114,10 @@ void ov::npuw::LLMInferRequest::infer_generate(ov::SoPtr input_ids, // taking into account kvcache dimension. fill_tensor(kvcache_in_tensor, 0); - auto prefill_out_slice = - make_tensor_slice(prefill_out_tensor, - kvcache_desc.dim, - kvcache_desc.max_prompt_size - kvcache_desc.num_stored_tokens, - kvcache_desc.max_prompt_size); + auto prefill_out_slice = make_tensor_slice(prefill_out_tensor, + kvcache_desc.dim, + kvcache_desc.max_prompt_size - kvcache_desc.num_stored_tokens, + kvcache_desc.max_prompt_size); auto kvcache_in_slice = make_tensor_slice(kvcache_in_tensor, kvcache_desc.dim, 0u, kvcache_desc.num_stored_tokens);