From e4b49fa5aa4f0f6d927b31428c7f9a3594ae6793 Mon Sep 17 00:00:00 2001
From: Jonathan <eisen@discourse.ai>
Date: Tue, 28 May 2024 02:35:42 -0700
Subject: [PATCH] Trying different approach to post-processing output.

---
 llm_eval/handler.py | 5 +++--
 pyproject.toml      | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/llm_eval/handler.py b/llm_eval/handler.py
index ecba125..052b73c 100644
--- a/llm_eval/handler.py
+++ b/llm_eval/handler.py
@@ -69,8 +69,9 @@ def load_model_and_tokenizer(self, model_id):
         print(model_id + " loaded.")
         return tokenizer, model
 
-    def post_process_output(self, output):
+    def post_process_output(self, prompt, output):
         """Extracts and returns content based on the predefined pattern from generated output."""
+        output = output[len(prompt)-1:]
         pattern = re.compile(r'\{\s*"(.+?)"\s*:\s*"(.+?)"\s*\}')
         match = re.search(pattern, output)
         return {match.group(1): match.group(2)} if match else output
@@ -108,7 +109,7 @@ def process_dataset(self):
                     if col.endswith('.input'):
                         output_col = col.replace('.input', '.output')
                         prompt, output = self.generate_output(row[col])
-                        output = self.post_process_output(output)
+                        output = self.post_process_output(prompt, output)
                         df.at[index, output_col] = output
             self.unload_model(model_name)
         return df
diff --git a/pyproject.toml b/pyproject.toml
index 82f42a3..21677dc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "llm-eval"
-version = "0.5.2"
+version = "0.5.3"
 authors = [
     {name = "Jonathan Eisenzopf", email = "jonathan.eisenzopf@talkmap.com"},
 ]