Trying different approach to post-processing output.

eisenzopf · May 28, 2024 · e4b49fa · e4b49fa
1 parent 7558671
commit e4b49fa
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 3 deletions.
diff --git a/llm_eval/handler.py b/llm_eval/handler.py
@@ -69,8 +69,9 @@ def load_model_and_tokenizer(self, model_id):
         print(model_id + " loaded.")
         return tokenizer, model
 
-    def post_process_output(self, output):
+    def post_process_output(self, prompt, output):
         """Extracts and returns content based on the predefined pattern from generated output."""
+        output = output[len(prompt)-1:]
         pattern = re.compile(r'\{\s*"(.+?)"\s*:\s*"(.+?)"\s*\}')
         match = re.search(pattern, output)
         return {match.group(1): match.group(2)} if match else output
@@ -108,7 +109,7 @@ def process_dataset(self):
                     if col.endswith('.input'):
                         output_col = col.replace('.input', '.output')
                         prompt, output = self.generate_output(row[col])
-                        output = self.post_process_output(output)
+                        output = self.post_process_output(prompt, output)
                         df.at[index, output_col] = output
             self.unload_model(model_name)
         return df

diff --git a/pyproject.toml b/pyproject.toml
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "llm-eval"
-version = "0.5.2"
+version = "0.5.3"
 authors = [
     {name = "Jonathan Eisenzopf", email = "[email protected]"},
 ]