rewrite tokenize instruction

Signed-off-by: Aisuko <[email protected]>
SkywardAI · Jul 9, 2024 · b1ecbed · b1ecbed
1 parent ca42e41
commit b1ecbed
Show file tree

Hide file tree

Showing 3 changed files with 23 additions and 25 deletions.
diff --git a/.gitignore b/.gitignore
@@ -11,6 +11,7 @@ __pycache__/
 /lib
 /lib-types
 /server
+bin/
 
 # Cache
 .cache

diff --git a/README.md b/README.md
@@ -65,6 +65,7 @@ See [deployment](https://skywardai.github.io/skywardai.io/docs/development/build
 * [SQLAlchemy](https://www.sqlalchemy.org/)
 * [llama.cpp](https://github.com/ggerganov/llama.cpp)
 * [HuggingFace](https://huggingface.co)
+* [RMIT Race Hub](https://race.rmit.edu.au)
 
 
 # License

diff --git a/backend/src/repository/inference_eng.py b/backend/src/repository/inference_eng.py
@@ -15,8 +15,7 @@
 
 # https://pypi.org/project/openai/1.35.5/
 import openai
-import requests
-import loguru
+import httpx
 
 from src.config.manager import settings
 
@@ -26,10 +25,10 @@ def init(self) -> None:
         self.infer_eng_url=settings.INFERENCE_ENG
         self.infer_eng_port=settings.INFERENCE_ENG_PORT
         self.instruction=settings.INSTRUCTION
-        # OpenAI-compatible Chat Completions API
-        self.client=self.openai_client()
-        self.n_keep=self.get_n_keep()
+        self.client=self.openai_client() # OpenAI-compatible Chat Completions API
         self.completion_url=self.instruct_infer_url()
+        self.tokenization_url=self.instruct_tokenize_url()
+        self.n_keep=self.get_n_keep()
 
 
     def openai_client(self) -> openai.OpenAI:
@@ -42,39 +41,36 @@ def openai_client(self) -> openai.OpenAI:
         """
         url=f'http://{self.infer_eng_url}:{self.infer_eng_port}/v1'
         api_key='sk-no-key-required'
-
         return openai.OpenAI(base_url=url, api_key=api_key)
 
 
     def get_n_keep(self) -> int:
         """
         We get n_keep dynamically for the instruction.
-        if the return value is 0, no tokens are kept.
 
         Returns:
         int: n_keep
-
         """
-
-        response = requests.post(
-            f"http://{inference_helper.infer_eng_url}:{inference_helper.infer_eng_port}/tokenize", 
-            headers={'Content-Type': 'application/json',}, 
-            json={"content": self.instruction}
-        )
-
-        if response.status_code != 200:
-            loguru.logger.error(f"Error in tokenization: {response.text}")
-            return 0
-
-        try:
-            tokenized_instruction = response.json().get('tokens')
+        with httpx.Client() as client:
+            res=client.post(
+                self.tokenization_url, 
+                headers={'Content-Type': 'application/json'}, 
+                json={"content": self.instruction}
+            )
+            res.raise_for_status()
+            tokenized_instruction = res.json().get('tokens')
             n_keep=len(tokenized_instruction)
-        except Exception as e:
-            loguru.logger.error(f"Error in tokenization: {e}")
-            return 0
-
         return n_keep
 
+    def instruct_tokenize_url(self)->str:
+        """
+        Get the URL for the tokenization engine
+
+        Returns:
+        str: URL for the tokenization
+        """
+        return f"http://{inference_helper.infer_eng_url}:{inference_helper.infer_eng_port}/tokenize"
+
 
     def instruct_infer_url(self)->str:
         """
-Original file line number
+Diff line change
@@ Expand Up / @@ -11,6 +11,7 @@ __pycache__/ @@
     /lib
     /lib-types
     /server
+    bin/
     # Cache
     .cache
@@ Expand Down @@