Skip to content

Commit

Permalink
response error logging
Browse files Browse the repository at this point in the history
  • Loading branch information
jstzwj committed Jun 30, 2024
1 parent 2b28af1 commit 0028262
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 19 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -160,4 +160,5 @@ cython_debug/
#.idea/

/.vscode/
/logs/
/logs/
/mteb_results/
40 changes: 40 additions & 0 deletions benchmark/bench_mteb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from typing import Any
import mteb
import numpy as np
import torch
from tqdm import tqdm

# Define the sentence-transformers model name
model_name = "gte-Qwen2-7B-instruct"

import openai

client = openai.OpenAI(
base_url = "http://127.0.0.1:8000/v1",
api_key="",
)

def batched(data, batch_size):
for i in range(0, len(data), batch_size):
yield data[i:i+batch_size]

class EmbeddingModel():
def encode(
self, sentences: list[str], **kwargs: Any
) -> torch.Tensor | np.ndarray:
ret = []
for sent in tqdm(batched(sentences, 16), total=len(sentences)//16):
response = client.embeddings.create(
model=model_name,
input=sent,
encoding_format="float",
)
for embed_data in response.data:
embed_final = embed_data.embedding
ret.append(embed_final)
return ret

model = EmbeddingModel()
tasks = mteb.get_tasks(tasks=["Banking77Classification"])
evaluation = mteb.MTEB(tasks=tasks)
results = evaluation.run(model, output_folder=f"mteb_results/{model_name}")
21 changes: 11 additions & 10 deletions langport/model/executor/embedding/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,20 +61,21 @@ def __init__(
model_path, device, num_gpus, max_gpu_memory, quantization, cpu_offloading,
deepspeed, gptq, group_size, trust_remote_code, offload_folder
)
if hasattr(self.model, "max_seq_length"):
self._context_len = self.model.max_seq_length
else:
self._context_len = 2048
else:
self.adapter, self.model, self.tokenizer = self.load_model(
model_path, device, num_gpus, max_gpu_memory, quantization, cpu_offloading,
deepspeed, gptq, group_size, trust_remote_code, offload_folder
)

if hasattr(self.model, "max_seq_length"):
self._context_len = self.model.max_seq_length
elif hasattr(self.model.config, "max_sequence_length"):
self._context_len = self.model.config.max_sequence_length
elif hasattr(self.model.config, "max_position_embeddings"):
self._context_len = self.model.config.max_position_embeddings
else:
self._context_len = 2048
if hasattr(self.model.config, "max_sequence_length"):
self._context_len = self.model.config.max_sequence_length
elif hasattr(self.model.config, "max_position_embeddings"):
self._context_len = self.model.config.max_position_embeddings
else:
self._context_len = 2048

def _record_call_time(self):
self.last_call_time = time.time()
Expand Down Expand Up @@ -182,7 +183,7 @@ def inference(self, worker: "EmbeddingModelWorker"):
else:
data = model(**encoded_prompts)
# embeddings = torch.mean(data, dim=1)
embeddings = self._mean_pooling(data, encoded_prompts['attention_mask'])
embeddings = self._mean_pooling(data, encoded_prompts['attention_mask']).cpu()
else:
embeddings = model.encode(prompts)

Expand Down
2 changes: 1 addition & 1 deletion langport/model/executor/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ def load_sentence_transformer_model(
kwargs["offload_folder"] = offload_folder

model = SentenceTransformer(model_path, device=device, trust_remote_code=trust_remote_code, model_kwargs=kwargs)
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False, trust_remote_code=trust_remote_code)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=trust_remote_code)
return adapter, model, tokenizer

def load_model(
Expand Down
29 changes: 22 additions & 7 deletions langport/service/gateway/openai_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,35 +59,50 @@ def redirect_model_name(model: str):
break
return model

def check_and_log_response(response):
if isinstance(response, JSONResponse):
response_body = json.loads(response.body)
if "object" in response_body and response_body["object"] == "error":
if "object" in response_body and "message" in response_body and "code" in response_body:
logger.error(f"[{response_body['object']}] [{response_body['code']}] - {response_body['message']}")
else:
logger.error(response.body)

@app.exception_handler(RequestValidationError)
async def validation_exception_handler(request: Request, exc):
logger.error(f"Invalid request: {await request.body()}")
return create_bad_request_response(ErrorCode.VALIDATION_TYPE_ERROR, str(exc))
logger.error(f"Invalid request: {(await request.body()).decode('utf-8')}")
response = create_bad_request_response(ErrorCode.VALIDATION_TYPE_ERROR, str(exc))
check_and_log_response(response)
return response

@app.get("/v1/models")
async def models():
return await api_models(app.app_settings)
response = await api_models(app.app_settings)
check_and_log_response(response)
return response

@app.post("/v1/chat/completions")
async def chat_completions(request: ChatCompletionRequest):
logger.info(request.json())
logger.info(json.dumps(json.loads(request.model_dump_json()), ensure_ascii=False))
request.model = redirect_model_name(request.model)
response = await api_chat_completions(app.app_settings, request)

check_and_log_response(response)
return response

@app.post("/v1/completions")
async def completions(request: CompletionRequest):
logger.info(request.json())
logger.info(json.dumps(json.loads(request.model_dump_json()), ensure_ascii=False))
request.model = redirect_model_name(request.model)
response = await api_completions(app.app_settings, request)
check_and_log_response(response)
return response

@app.post("/v1/embeddings")
async def embeddings(request: EmbeddingsRequest):
logger.info(request.json())
logger.info(json.dumps(json.loads(request.model_dump_json()), ensure_ascii=False))
request.model = redirect_model_name(request.model)
response = await api_embeddings(app.app_settings, request)
check_and_log_response(response)
return response

if __name__ in ["__main__", "langport.service.gateway.openai_api"]:
Expand Down

0 comments on commit 0028262

Please sign in to comment.