NPUW: Enable PMM for prefill by default (#27057)

### Details: - Aligns memory format between prefill and kvcache - Recommended to be disabled by default when sharing is in place (e.g, when DQ is applied to both models) ### Tickets: - E-143367
openvinotoolkit · Oct 15, 2024 · 4afbcbd · 4afbcbd
1 parent c6801aa
commit 4afbcbd
Showing 1 changed file with 0 additions and 5 deletions.
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp
@@ -695,11 +695,6 @@ DQParMMGQ::DQParMMGQ(Context::Ref ctx) {
             return false;
         }
 
-        if (qmmi_shape[1] != 1 && !ctx.get().is_spatial) {
-            // For non 1-token cases, do transformation if and only if and only if the block is spatial
-            return false;
-        }
-
         if (!matmul->get_transpose_a() && !matmul->get_transpose_b()) {
             ctx.get().register_parallel_matmul(node_to_output.at(qmmi), 2, Context::DQParMM{w_param, s_param, matmul});
         } else if (!matmul->get_transpose_a() && matmul->get_transpose_b()) {