diff --git a/llama.cpp b/llama.cpp
index 2df373a..b864b50 160000
--- a/llama.cpp
+++ b/llama.cpp
@@ -1 +1 @@
-Subproject commit 2df373ac40ea581ccca8a58c713f03ad9d4b658d
+Subproject commit b864b50ce5e2beefc8c2fd31733e4e1a978b7754
diff --git a/src/llama_server_context.cc b/src/llama_server_context.cc
index 5940cae..13140b5 100644
--- a/src/llama_server_context.cc
+++ b/src/llama_server_context.cc
@@ -264,11 +264,7 @@ json LlamaServerContext::GetModelProps() {
 }
 
 int LlamaServerContext::RequestCompletion(json data, bool infill,
-                                          bool embedding, int multitask_id) {
-  // From this commit: 'llama : allow pooled embeddings on any model (#7477)'
-  // we need to explicitly set embedding flad for each request
-  llama_set_embeddings(ctx, embedding);
-  
+                                          bool embedding, int multitask_id) { 
   TaskServer task;
   task.id = id_gen++;
   task.target_id = 0;