diff --git a/llama.cpp b/llama.cpp index 2df373a..b864b50 160000 --- a/llama.cpp +++ b/llama.cpp @@ -1 +1 @@ -Subproject commit 2df373ac40ea581ccca8a58c713f03ad9d4b658d +Subproject commit b864b50ce5e2beefc8c2fd31733e4e1a978b7754 diff --git a/src/llama_server_context.cc b/src/llama_server_context.cc index 5940cae..13140b5 100644 --- a/src/llama_server_context.cc +++ b/src/llama_server_context.cc @@ -264,11 +264,7 @@ json LlamaServerContext::GetModelProps() { } int LlamaServerContext::RequestCompletion(json data, bool infill, - bool embedding, int multitask_id) { - // From this commit: 'llama : allow pooled embeddings on any model (#7477)' - // we need to explicitly set embedding flad for each request - llama_set_embeddings(ctx, embedding); - + bool embedding, int multitask_id) { TaskServer task; task.id = id_gen++; task.target_id = 0;