-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdemo03_eval.py
71 lines (56 loc) · 2.44 KB
/
demo03_eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from demo00_conf import *
import matplotlib.pyplot as plt
plt.switch_backend("agg")
from phaser.evaluation import ComputeMetrics, make_bit_weights
from phaser.similarities import IntraDistance, InterDistance, find_inter_samplesize
from phaser.plotting import bit_weights_ax
print("Running script.")
# Load the label encoders
le = load("./demo_outputs/LabelEncoders.bz2")
df_h = load("./demo_outputs/Hashes.df.bz2")
df_d = load("./demo_outputs/Distances.df.bz2")
n_samples = find_inter_samplesize(len(df_h["filename"].unique() * 1))
# Generate triplet combinations without 'orig'
triplets = np.array(
np.meshgrid(
le["a"].classes_, [t for t in le["t"].classes_ if t != "orig"], le["m"].classes_
)
).T.reshape(-1, 3)
# Compute metrics for all available triplets
print(f"Number of triplets to analyse: {len(triplets)}")
cm = ComputeMetrics(le, df_d, df_h, analyse_bits=True, n_jobs=1)
m, b = cm.fit(triplets=triplets)
print(f"Performance without bit weights:")
print(m.groupby(["Algorithm"])[["AUC", "EER"]].agg(["mean", "std"]))
print(m)
# Plot the bit frequency for each triplet ignoring 'orig'
print(f"Plotting bit weights for each triplets")
for triplet in list(b.keys()):
fig, ax = plt.subplots(1, 1, figsize=(5, 1.5), constrained_layout=True)
_ = bit_weights_ax(b[triplet], ax=ax)
fig.savefig(f"./demo_outputs/figs/03-bit_analysis_{triplet}.png")
plt.close()
# Create bit_weights (algo,metric)
weights = make_bit_weights(b, le)
# Plot the applied bitweights for the pairs (algo,metric)
for pair in list(weights.keys()):
fig, ax = plt.subplots(1, 1, figsize=(5, 1.5), constrained_layout=True)
_ = bit_weights_ax(weights[pair].reshape(-1, 1), ax=ax)
fig.savefig(f"./demo_outputs/figs/03-bit_weights_{pair}.png")
plt.close()
intra_df_w = IntraDistance(METR_dict, le, 1, weights, progress_bar=True).fit(df_h)
inter_df_w = InterDistance(METR_dict, le, 0, weights, n_samples, progress_bar=True).fit(
df_h
)
df_d_w = pd.concat([intra_df_w, inter_df_w])
cm_w = ComputeMetrics(le, df_d_w, df_h, analyse_bits=False, n_jobs=1)
m_w, _ = cm_w.fit(triplets=triplets)
print(f"Performance with bit weights:")
print(m_w.groupby(["Algorithm"])[["AUC", "EER"]].agg(["mean", "std"]))
print(m_w)
# Plot the AUC comparison between without and with bit weights
from phaser.plotting import auc_cmp_fig
fig = auc_cmp_fig(m, m_w, metric="Hamming")
fig.savefig("./demo_outputs/figs/03_auc_cmp_w_without_weights.png")
plt.close()
print("Script finished")