From e4b49fa5aa4f0f6d927b31428c7f9a3594ae6793 Mon Sep 17 00:00:00 2001 From: Jonathan Date: Tue, 28 May 2024 02:35:42 -0700 Subject: [PATCH] Trying different approach to post-processing output. --- llm_eval/handler.py | 5 +++-- pyproject.toml | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/llm_eval/handler.py b/llm_eval/handler.py index ecba125..052b73c 100644 --- a/llm_eval/handler.py +++ b/llm_eval/handler.py @@ -69,8 +69,9 @@ def load_model_and_tokenizer(self, model_id): print(model_id + " loaded.") return tokenizer, model - def post_process_output(self, output): + def post_process_output(self, prompt, output): """Extracts and returns content based on the predefined pattern from generated output.""" + output = output[len(prompt)-1:] pattern = re.compile(r'\{\s*"(.+?)"\s*:\s*"(.+?)"\s*\}') match = re.search(pattern, output) return {match.group(1): match.group(2)} if match else output @@ -108,7 +109,7 @@ def process_dataset(self): if col.endswith('.input'): output_col = col.replace('.input', '.output') prompt, output = self.generate_output(row[col]) - output = self.post_process_output(output) + output = self.post_process_output(prompt, output) df.at[index, output_col] = output self.unload_model(model_name) return df diff --git a/pyproject.toml b/pyproject.toml index 82f42a3..21677dc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta" [project] name = "llm-eval" -version = "0.5.2" +version = "0.5.3" authors = [ {name = "Jonathan Eisenzopf", email = "jonathan.eisenzopf@talkmap.com"}, ]