Skip to content

Commit

Permalink
Trying different approach to post-processing output.
Browse files Browse the repository at this point in the history
  • Loading branch information
Jonathan committed May 28, 2024
1 parent 7558671 commit e4b49fa
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 3 deletions.
5 changes: 3 additions & 2 deletions llm_eval/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,9 @@ def load_model_and_tokenizer(self, model_id):
print(model_id + " loaded.")
return tokenizer, model

def post_process_output(self, output):
def post_process_output(self, prompt, output):
"""Extracts and returns content based on the predefined pattern from generated output."""
output = output[len(prompt)-1:]
pattern = re.compile(r'\{\s*"(.+?)"\s*:\s*"(.+?)"\s*\}')
match = re.search(pattern, output)
return {match.group(1): match.group(2)} if match else output
Expand Down Expand Up @@ -108,7 +109,7 @@ def process_dataset(self):
if col.endswith('.input'):
output_col = col.replace('.input', '.output')
prompt, output = self.generate_output(row[col])
output = self.post_process_output(output)
output = self.post_process_output(prompt, output)
df.at[index, output_col] = output
self.unload_model(model_name)
return df
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "llm-eval"
version = "0.5.2"
version = "0.5.3"
authors = [
{name = "Jonathan Eisenzopf", email = "[email protected]"},
]
Expand Down

0 comments on commit e4b49fa

Please sign in to comment.