From 7705c6ed3e2402e393c81995a28a66eba00db38d Mon Sep 17 00:00:00 2001 From: AlpinDale Date: Wed, 18 Dec 2024 17:41:01 +0000 Subject: [PATCH] api: use fp32 for base64 embeddings --- aphrodite/endpoints/openai/serving_embedding.py | 4 +++- examples/openai_api/embedding.py | 1 - tests/endpoints/openai/test_embedding.py | 10 +++++++++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/aphrodite/endpoints/openai/serving_embedding.py b/aphrodite/endpoints/openai/serving_embedding.py index 55afd1b48..644b6a8d7 100644 --- a/aphrodite/endpoints/openai/serving_embedding.py +++ b/aphrodite/endpoints/openai/serving_embedding.py @@ -32,7 +32,9 @@ def request_output_to_embedding_response( prompt_token_ids = final_res.prompt_token_ids embedding = final_res.outputs.embedding if encoding_format == "base64": - embedding_bytes = np.array(embedding).tobytes() + # Force to use float32 for base64 encoding + # to match the OpenAI python client behavior + embedding_bytes = np.array(embedding, dtype="float32").tobytes() embedding = base64.b64encode(embedding_bytes).decode("utf-8") embedding_data = EmbeddingResponseData(index=idx, embedding=embedding) data.append(embedding_data) diff --git a/examples/openai_api/embedding.py b/examples/openai_api/embedding.py index 253f2037e..ec22bd302 100644 --- a/examples/openai_api/embedding.py +++ b/examples/openai_api/embedding.py @@ -19,7 +19,6 @@ "The weather is nice today", ], model=model, - encoding_format="float", ) for data in responses.data: diff --git a/tests/endpoints/openai/test_embedding.py b/tests/endpoints/openai/test_embedding.py index 93d77b580..16b74a69a 100644 --- a/tests/endpoints/openai/test_embedding.py +++ b/tests/endpoints/openai/test_embedding.py @@ -128,9 +128,17 @@ async def test_batch_base64_embedding(embedding_client: openai.AsyncOpenAI, for data in responses_base64.data: decoded_responses_base64_data.append( np.frombuffer(base64.b64decode(data.embedding), - dtype="float").tolist()) + dtype="float32").tolist()) assert responses_float.data[0].embedding == decoded_responses_base64_data[ 0] assert responses_float.data[1].embedding == decoded_responses_base64_data[ 1] + + # Default response is float32 decoded from base64 by OpenAI Client + responses_default = await embedding_client.embeddings.create( + input=input_texts, model=model_name) + assert responses_float.data[0].embedding == responses_default.data[ + 0].embedding + assert responses_float.data[1].embedding == responses_default.data[ + 1].embedding