-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
82 lines (67 loc) · 3.43 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import argparse
import pandas as pd
import numpy as np
import ast
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import precision_recall_curve, f1_score, matthews_corrcoef
VAL_DIRECTIONS = ['deu_Latn_eng_Latn', 'eng_Latn_deu_Latn']
# Functions
def prepare_data(input_path, load_embed, embed_methods):
"""Load and prepare the dataset."""
if load_embed:
# Load embeddings with converters for embedded columns.
embed_cols = [f'{method}_src_embeds' for method in embed_methods] + [f'{method}_mt_embeds' for method in embed_methods]
converters = {col: ast.literal_eval for col in embed_cols}
df = pd.read_csv(input_path, converters=converters)
else:
# TODO: generate embeddings
df = pd.read_csv(input_path, sep="\t")
df = df[df.perturbation == 'natural'].reset_index(drop=True)
df['binary_hall'] = df['class_hall'].apply(lambda x: 0 if str(x).startswith('1') else 1)
df_val = df[df.direction.isin(VAL_DIRECTIONS)].reset_index(drop=True)
df_test = df[~df.direction.isin(VAL_DIRECTIONS)].reset_index(drop=True)
return df_val, df_test
def calculate_similarity(df, methods):
"""Calculate cosine similarity scores between source and translated texts for each embedding space."""
for method in methods:
df[f'score_{method}_cos'] = df.apply(
lambda x: -cosine_similarity([x[f'{method}_src_embeds']], [x[f'{method}_mt_embeds']])[0][0], axis=1
)
return df
def binarize_scores(df_val, df_test, methods):
"""Binarize similarity scores based on the optimal threshold."""
for method in methods:
# Identify the optimal threshold using F1 score
y_true = df_val['binary_hall']
y_pred = df_val[f'score_{method}_cos']
precision, recall, thresholds = precision_recall_curve(y_true, y_pred)
fscore = (2 * precision * recall) / (precision + recall)
optimal_thresh = thresholds[np.argmax(fscore)]
# Binarize cosine similarities
df_test[f'bin_{method}_cos'] = df_test[f'score_{method}_cos'].apply(lambda x: 0 if x < optimal_thresh else 1)
return df_test
# Main function
def main(args):
# Load and prepare data
df_val, df_test = prepare_data(args.input, args.load_embed, args.embed_methods)
# ----------Embeddings---------
# Calculate similarity scores
df_val = calculate_similarity(df_val, args.embed_methods)
df_test = calculate_similarity(df_test, args.embed_methods)
# Binarize scores based on optimal threshold
df_test = binarize_scores(df_val, df_test, args.embed_methods)
# -----------Evaluate------------
df_eval = df_test.groupby("direction", group_keys=True).apply(
lambda x: pd.Series(
{method: matthews_corrcoef(x['binary_hall'], x[f'bin_{method}_cos']) for method in args.embed_methods}
)
)
print("Evaluation results:")
print(df_eval)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="MT Hallucination Detection")
parser.add_argument('--input', type=str, required=True, help='Path to the input file')
parser.add_argument('--load_embed', type=bool, required=False, help='Load pre-computed sentence embeddings if set, otherwise generate new ones')
parser.add_argument('--embed_methods', nargs='+', required=False, choices=['sonar', 'gpt', 'cohere', 'mistral'], help='Embedding methods to evaluate')
args = parser.parse_args()
main(args)