-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path2_AUCROC.py
75 lines (67 loc) · 3.08 KB
/
2_AUCROC.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import pandas as pd
from scipy.stats import ttest_ind
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
import seaborn as sns
# Load the data
file_path = "/home/szy/proj/crf/LCK_VS_anal/dockingscore.xlsx" # Please adjust the path if needed
dockingscore_data = pd.read_excel(file_path)
# Split data into active and inactive
active_data = dockingscore_data[dockingscore_data['Name'].str.startswith('P')]
inactive_data = dockingscore_data[dockingscore_data['Name'].str.startswith('C')]
# Perform t-test and store results
ttest_results = {}
for software in ['Vina', 'PLANET', 'Autodock-gpu']:
t_stat, p_val = ttest_ind(active_data[software], inactive_data[software])
ttest_results[software] = f"{software}: t-statistic = {t_stat:.2f}, p-value = {format(p_val, '.5e')}"
print(ttest_results[software])
# Define contrasting colors and line styles for the ROC curves
colors = {
'Vina': 'blue',
'PLANET': 'green',
'Autodock-gpu': 'red'
}
line_styles = {
'Vina': '--',
'PLANET': '-',
'Autodock-gpu': '-.'
}
# Generate ROC curves
dockingscore_data['Label'] = dockingscore_data['Name'].apply(lambda x: 1 if x.startswith('P') else 0)
plt.figure(figsize=(10, 7))
for software in colors.keys():
fpr, tpr, _ = roc_curve(dockingscore_data['Label'], -dockingscore_data[software]) # Negative scores since lower docking scores usually indicate better binding
roc_auc = auc(fpr, tpr)
plt.plot(fpr, tpr, color=colors[software], linestyle=line_styles[software], label=f'{software} (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], 'k--', label='Random Classifier (AUC = 0.50)')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves for Docking Software with Contrasting Colors and Line Styles')
plt.legend(loc="lower right")
plt.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.tight_layout()
plt.show()
# Boxplots
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
for ax, software in zip(axes, ['Vina', 'PLANET', 'Autodock-gpu']):
sns.boxplot(x=dockingscore_data['Label'], y=dockingscore_data[software], ax=ax, palette="Set2")
ax.set_title(f'Boxplot for {software}')
ax.set_xlabel('Activity')
ax.set_xticklabels(['Inactive', 'Active'])
ax.set_ylabel('Docking Score')
# Add t-test results to the plot
ax.text(0.5, -0.15, ttest_results[software], horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontsize=10, color='black')
plt.tight_layout()
plt.show()
# Violin plots
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
for ax, software in zip(axes, ['Vina', 'PLANET', 'Autodock-gpu']):
sns.violinplot(x=dockingscore_data['Label'], y=dockingscore_data[software], ax=ax, palette="Set2", inner="quartile")
ax.set_title(f'Violin plot for {software}')
ax.set_xlabel('Activity')
ax.set_xticklabels(['Inactive', 'Active'])
ax.set_ylabel('Docking Score')
# Add t-test results to the plot
ax.text(0.5, -0.15, ttest_results[software], horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontsize=10, color='black')
plt.tight_layout()
plt.show()