Skip to content

Commit

Permalink
api: use fp32 for base64 embeddings
Browse files Browse the repository at this point in the history
  • Loading branch information
AlpinDale committed Dec 18, 2024
1 parent 3b684a8 commit 7705c6e
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 3 deletions.
4 changes: 3 additions & 1 deletion aphrodite/endpoints/openai/serving_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ def request_output_to_embedding_response(
prompt_token_ids = final_res.prompt_token_ids
embedding = final_res.outputs.embedding
if encoding_format == "base64":
embedding_bytes = np.array(embedding).tobytes()
# Force to use float32 for base64 encoding
# to match the OpenAI python client behavior
embedding_bytes = np.array(embedding, dtype="float32").tobytes()
embedding = base64.b64encode(embedding_bytes).decode("utf-8")
embedding_data = EmbeddingResponseData(index=idx, embedding=embedding)
data.append(embedding_data)
Expand Down
1 change: 0 additions & 1 deletion examples/openai_api/embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
"The weather is nice today",
],
model=model,
encoding_format="float",
)

for data in responses.data:
Expand Down
10 changes: 9 additions & 1 deletion tests/endpoints/openai/test_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,17 @@ async def test_batch_base64_embedding(embedding_client: openai.AsyncOpenAI,
for data in responses_base64.data:
decoded_responses_base64_data.append(
np.frombuffer(base64.b64decode(data.embedding),
dtype="float").tolist())
dtype="float32").tolist())

assert responses_float.data[0].embedding == decoded_responses_base64_data[
0]
assert responses_float.data[1].embedding == decoded_responses_base64_data[
1]

# Default response is float32 decoded from base64 by OpenAI Client
responses_default = await embedding_client.embeddings.create(
input=input_texts, model=model_name)
assert responses_float.data[0].embedding == responses_default.data[
0].embedding
assert responses_float.data[1].embedding == responses_default.data[
1].embedding

0 comments on commit 7705c6e

Please sign in to comment.