From b1ecbedefad8e4dd9bdcc17bd76ad4239b4232cb Mon Sep 17 00:00:00 2001 From: Aisuko Date: Tue, 9 Jul 2024 12:54:24 +0000 Subject: [PATCH] rewrite tokenize instruction Signed-off-by: Aisuko --- .gitignore | 1 + README.md | 1 + backend/src/repository/inference_eng.py | 46 +++++++++++-------------- 3 files changed, 23 insertions(+), 25 deletions(-) diff --git a/.gitignore b/.gitignore index 4c2c103..78fb54c 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ __pycache__/ /lib /lib-types /server +bin/ # Cache .cache diff --git a/README.md b/README.md index e9983aa..15c26c0 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,7 @@ See [deployment](https://skywardai.github.io/skywardai.io/docs/development/build * [SQLAlchemy](https://www.sqlalchemy.org/) * [llama.cpp](https://github.com/ggerganov/llama.cpp) * [HuggingFace](https://huggingface.co) +* [RMIT Race Hub](https://race.rmit.edu.au) # License diff --git a/backend/src/repository/inference_eng.py b/backend/src/repository/inference_eng.py index c1e3ebf..9889774 100644 --- a/backend/src/repository/inference_eng.py +++ b/backend/src/repository/inference_eng.py @@ -15,8 +15,7 @@ # https://pypi.org/project/openai/1.35.5/ import openai -import requests -import loguru +import httpx from src.config.manager import settings @@ -26,10 +25,10 @@ def init(self) -> None: self.infer_eng_url=settings.INFERENCE_ENG self.infer_eng_port=settings.INFERENCE_ENG_PORT self.instruction=settings.INSTRUCTION - # OpenAI-compatible Chat Completions API - self.client=self.openai_client() - self.n_keep=self.get_n_keep() + self.client=self.openai_client() # OpenAI-compatible Chat Completions API self.completion_url=self.instruct_infer_url() + self.tokenization_url=self.instruct_tokenize_url() + self.n_keep=self.get_n_keep() def openai_client(self) -> openai.OpenAI: @@ -42,39 +41,36 @@ def openai_client(self) -> openai.OpenAI: """ url=f'http://{self.infer_eng_url}:{self.infer_eng_port}/v1' api_key='sk-no-key-required' - return openai.OpenAI(base_url=url, api_key=api_key) def get_n_keep(self) -> int: """ We get n_keep dynamically for the instruction. - if the return value is 0, no tokens are kept. Returns: int: n_keep - """ - - response = requests.post( - f"http://{inference_helper.infer_eng_url}:{inference_helper.infer_eng_port}/tokenize", - headers={'Content-Type': 'application/json',}, - json={"content": self.instruction} - ) - - if response.status_code != 200: - loguru.logger.error(f"Error in tokenization: {response.text}") - return 0 - - try: - tokenized_instruction = response.json().get('tokens') + with httpx.Client() as client: + res=client.post( + self.tokenization_url, + headers={'Content-Type': 'application/json'}, + json={"content": self.instruction} + ) + res.raise_for_status() + tokenized_instruction = res.json().get('tokens') n_keep=len(tokenized_instruction) - except Exception as e: - loguru.logger.error(f"Error in tokenization: {e}") - return 0 - return n_keep + def instruct_tokenize_url(self)->str: + """ + Get the URL for the tokenization engine + + Returns: + str: URL for the tokenization + """ + return f"http://{inference_helper.infer_eng_url}:{inference_helper.infer_eng_port}/tokenize" + def instruct_infer_url(self)->str: """