diff --git a/aphrodite/endpoints/openai/serving_embedding.py b/aphrodite/endpoints/openai/serving_embedding.py index 55afd1b48..644b6a8d7 100644 --- a/aphrodite/endpoints/openai/serving_embedding.py +++ b/aphrodite/endpoints/openai/serving_embedding.py @@ -32,7 +32,9 @@ def request_output_to_embedding_response( prompt_token_ids = final_res.prompt_token_ids embedding = final_res.outputs.embedding if encoding_format == "base64": - embedding_bytes = np.array(embedding).tobytes() + # Force to use float32 for base64 encoding + # to match the OpenAI python client behavior + embedding_bytes = np.array(embedding, dtype="float32").tobytes() embedding = base64.b64encode(embedding_bytes).decode("utf-8") embedding_data = EmbeddingResponseData(index=idx, embedding=embedding) data.append(embedding_data) diff --git a/examples/openai_api/embedding.py b/examples/openai_api/embedding.py index 253f2037e..ec22bd302 100644 --- a/examples/openai_api/embedding.py +++ b/examples/openai_api/embedding.py @@ -19,7 +19,6 @@ "The weather is nice today", ], model=model, - encoding_format="float", ) for data in responses.data: diff --git a/tests/endpoints/openai/test_embedding.py b/tests/endpoints/openai/test_embedding.py index 93d77b580..16b74a69a 100644 --- a/tests/endpoints/openai/test_embedding.py +++ b/tests/endpoints/openai/test_embedding.py @@ -128,9 +128,17 @@ async def test_batch_base64_embedding(embedding_client: openai.AsyncOpenAI, for data in responses_base64.data: decoded_responses_base64_data.append( np.frombuffer(base64.b64decode(data.embedding), - dtype="float").tolist()) + dtype="float32").tolist()) assert responses_float.data[0].embedding == decoded_responses_base64_data[ 0] assert responses_float.data[1].embedding == decoded_responses_base64_data[ 1] + + # Default response is float32 decoded from base64 by OpenAI Client + responses_default = await embedding_client.embeddings.create( + input=input_texts, model=model_name) + assert responses_float.data[0].embedding == responses_default.data[ + 0].embedding + assert responses_float.data[1].embedding == responses_default.data[ + 1].embedding