Skip to content

Commit

Permalink
rewrite tokenize instruction
Browse files Browse the repository at this point in the history
Signed-off-by: Aisuko <[email protected]>
  • Loading branch information
Aisuko committed Jul 9, 2024
1 parent ca42e41 commit b1ecbed
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 25 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ __pycache__/
/lib
/lib-types
/server
bin/

# Cache
.cache
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ See [deployment](https://skywardai.github.io/skywardai.io/docs/development/build
* [SQLAlchemy](https://www.sqlalchemy.org/)
* [llama.cpp](https://github.com/ggerganov/llama.cpp)
* [HuggingFace](https://huggingface.co)
* [RMIT Race Hub](https://race.rmit.edu.au)


# License
Expand Down
46 changes: 21 additions & 25 deletions backend/src/repository/inference_eng.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@

# https://pypi.org/project/openai/1.35.5/
import openai
import requests
import loguru
import httpx

from src.config.manager import settings

Expand All @@ -26,10 +25,10 @@ def init(self) -> None:
self.infer_eng_url=settings.INFERENCE_ENG
self.infer_eng_port=settings.INFERENCE_ENG_PORT
self.instruction=settings.INSTRUCTION
# OpenAI-compatible Chat Completions API
self.client=self.openai_client()
self.n_keep=self.get_n_keep()
self.client=self.openai_client() # OpenAI-compatible Chat Completions API
self.completion_url=self.instruct_infer_url()
self.tokenization_url=self.instruct_tokenize_url()
self.n_keep=self.get_n_keep()


def openai_client(self) -> openai.OpenAI:
Expand All @@ -42,39 +41,36 @@ def openai_client(self) -> openai.OpenAI:
"""
url=f'http://{self.infer_eng_url}:{self.infer_eng_port}/v1'
api_key='sk-no-key-required'

return openai.OpenAI(base_url=url, api_key=api_key)


def get_n_keep(self) -> int:
"""
We get n_keep dynamically for the instruction.
if the return value is 0, no tokens are kept.
Returns:
int: n_keep
"""

response = requests.post(
f"http://{inference_helper.infer_eng_url}:{inference_helper.infer_eng_port}/tokenize",
headers={'Content-Type': 'application/json',},
json={"content": self.instruction}
)

if response.status_code != 200:
loguru.logger.error(f"Error in tokenization: {response.text}")
return 0

try:
tokenized_instruction = response.json().get('tokens')
with httpx.Client() as client:
res=client.post(
self.tokenization_url,
headers={'Content-Type': 'application/json'},
json={"content": self.instruction}
)
res.raise_for_status()
tokenized_instruction = res.json().get('tokens')
n_keep=len(tokenized_instruction)
except Exception as e:
loguru.logger.error(f"Error in tokenization: {e}")
return 0

return n_keep

def instruct_tokenize_url(self)->str:
"""
Get the URL for the tokenization engine
Returns:
str: URL for the tokenization
"""
return f"http://{inference_helper.infer_eng_url}:{inference_helper.infer_eng_port}/tokenize"


def instruct_infer_url(self)->str:
"""
Expand Down

0 comments on commit b1ecbed

Please sign in to comment.