Skip to content

Commit

Permalink
Merge pull request #163 from ljchang/roc
Browse files Browse the repository at this point in the history
Roc

Former-commit-id: 529d9a3
  • Loading branch information
ljchang authored Nov 6, 2017
2 parents a6054a8 + 4a386db commit 39004d3
Showing 1 changed file with 35 additions and 25 deletions.
60 changes: 35 additions & 25 deletions nltools/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def __init__(self, input_values=None, binary_outcome=None,
if not any(binary_outcome):
raise ValueError("Data Problem: binary_outcome may not be boolean")

thr_type = ['optimal_overall', 'optimal_balanced','minimum_sdt_bias']
thr_type = ['optimal_overall', 'optimal_balanced', 'minimum_sdt_bias']
if threshold_type not in thr_type:
raise ValueError("threshold_type must be ['optimal_overall', "
"'optimal_balanced','minimum_sdt_bias']")
Expand All @@ -58,8 +58,7 @@ def __init__(self, input_values=None, binary_outcome=None,
self.binary_outcome = deepcopy(binary_outcome)
self.threshold_type = deepcopy(threshold_type)
self.forced_choice = deepcopy(forced_choice)

if isinstance(self.binary_outcome,pd.DataFrame):
if isinstance(self.binary_outcome, pd.DataFrame):
self.binary_outcome = np.array(self.binary_outcome).flatten()
else:
self.binary_outcome = deepcopy(binary_outcome)
Expand All @@ -81,7 +80,9 @@ def calculate(self, input_values=None, binary_outcome=None,
forced_choice: index indicating position for each unique subject
(default=None)
balanced_acc: balanced accuracy for single-interval classification
(bool)
(bool). THIS IS NOT COMPLETELY IMPLEMENTED BECAUSE
IT AFFECTS ACCURACY ESTIMATES, BUT NOT P-VALUES OR
THRESHOLD AT WHICH TO EVALUATE SENS/SPEC
**kwargs: Additional keyword arguments to pass to the prediction
algorithm
Expand Down Expand Up @@ -111,15 +112,15 @@ def calculate(self, input_values=None, binary_outcome=None,
assert len(set(sub_idx).union(set(np.array(self.forced_choice)[self.binary_outcome]))) == len(sub_idx), "Issue with forced_choice subject labels."
assert len(set(sub_idx).union(set(np.array(self.forced_choice)[~self.binary_outcome]))) == len(sub_idx), "Issue with forced_choice subject labels."
for sub in sub_idx:
sub_mn = (self.input_values[(self.forced_choice==sub) & (self.binary_outcome==True)]+self.input_values[(self.forced_choice==sub) & (self.binary_outcome==False)])[0]/2
self.input_values[(self.forced_choice==sub) & (self.binary_outcome==True)] = self.input_values[(self.forced_choice==sub) & (self.binary_outcome==True)][0] - sub_mn
self.input_values[(self.forced_choice==sub) & (self.binary_outcome==False)] = self.input_values[(self.forced_choice==sub) & (self.binary_outcome==False)][0] - sub_mn
sub_mn = (self.input_values[(self.forced_choice == sub) & (self.binary_outcome == True)]+self.input_values[(self.forced_choice == sub) & (self.binary_outcome == False)])[0]/2
self.input_values[(self.forced_choice == sub) & (self.binary_outcome == True)] = self.input_values[(self.forced_choice == sub) & (self.binary_outcome == True)][0] - sub_mn
self.input_values[(self.forced_choice == sub) & (self.binary_outcome == False)] = self.input_values[(self.forced_choice == sub) & (self.binary_outcome == False)][0] - sub_mn
self.class_thr = 0;

# Calculate true positive and false positive rate
self.tpr = np.zeros(self.criterion_values.shape)
self.fpr = np.zeros(self.criterion_values.shape)
for i,x in enumerate(self.criterion_values):
for i, x in enumerate(self.criterion_values):
wh = self.input_values >= x
self.tpr[i] = np.sum(wh[self.binary_outcome])/np.sum(self.binary_outcome)
self.fpr[i] = np.sum(wh[~self.binary_outcome])/np.sum(~self.binary_outcome)
Expand All @@ -135,12 +136,12 @@ def calculate(self, input_values=None, binary_outcome=None,
self.class_thr = self.criterion_values[np.argmax(mn)]
elif threshold_type == 'optimal_overall':
n_corr_t = self.tpr*self.n_true
n_corr_f = (1-self.fpr)*self.n_false
sm = (n_corr_t+n_corr_f)
n_corr_f = (1 - self.fpr)*self.n_false
sm = (n_corr_t + n_corr_f)
self.class_thr = self.criterion_values[np.argmax(sm)]
elif threshold_type == 'minimum_sdt_bias':
# Calculate MacMillan and Creelman 2005 Response Bias (c_bias)
c_bias = ( norm.ppf(np.maximum(.0001, np.minimum(0.9999, self.tpr))) + norm.ppf(np.maximum(.0001, np.minimum(0.9999, self.fpr))) ) / float(2)
c_bias = (norm.ppf(np.maximum(.0001, np.minimum(0.9999, self.tpr))) + norm.ppf(np.maximum(.0001, np.minimum(0.9999, self.fpr)))) / float(2)
self.class_thr = self.criterion_values[np.argmin(abs(c_bias))]

# Calculate output
Expand All @@ -161,7 +162,7 @@ def calculate(self, input_values=None, binary_outcome=None,

# Calculate Accuracy
if balanced_acc:
self.accuracy = np.mean([self.sensitivity,self.specificity]) #See Brodersen, Ong, Stephan, Buhmann (2010)
self.accuracy = np.mean([self.sensitivity, self.specificity]) #See Brodersen, Ong, Stephan, Buhmann (2010)
else:
self.accuracy = 1 - np.mean(self.misclass)

Expand All @@ -171,7 +172,7 @@ def calculate(self, input_values=None, binary_outcome=None,
self.accuracy_se = np.sqrt(np.mean(~self.misclass) * (np.mean(~self.misclass)) / self.n)


def plot(self, plot_method = 'gaussian'):
def plot(self, plot_method = 'gaussian', balanced_acc=False, **kwargs):
""" Create ROC Plot
Create a specific kind of ROC curve plot, based on input values
Expand All @@ -188,38 +189,47 @@ def plot(self, plot_method = 'gaussian'):
"""

self.calculate() # Calculate ROC parameters
self.calculate(balanced_acc=balanced_acc) # Calculate ROC parameters


if plot_method == 'gaussian':
if self.forced_choice is not None:
sub_idx = np.unique(self.forced_choice)
diff_scores = []
for sub in sub_idx:
diff_scores.append(self.input_values[(self.forced_choice == sub) & (self.binary_outcome==True)][0] - self.input_values[(self.forced_choice==sub) & (self.binary_outcome==False)][0])
diff_scores.append(self.input_values[(self.forced_choice == sub) & (self.binary_outcome == True)][0] - self.input_values[(self.forced_choice == sub) & (self.binary_outcome == False)][0])
diff_scores = np.array(diff_scores)
mn_diff = np.mean(diff_scores)
d = mn_diff / np.std(diff_scores)
pooled_sd = np.std(diff_scores) / np.sqrt(2);
pooled_sd = np.std(diff_scores) / np.sqrt(2)
d_a_model = mn_diff / pooled_sd

x = np.arange(-3,3,.1)
tpr_smooth = 1 - norm.cdf(x, d, 1)
fpr_smooth = 1 - norm.cdf(x, -d, 1)
expected_acc = 1 - norm.cdf(0, d, 1)
self.sensitivity = expected_acc
self.specificity = expected_acc
self.ppv = self.sensitivity / (self.sensitivity +
1 - self.specificity)
self.auc = norm.cdf(d_a_model / np.sqrt(2))

x = np.arange(-3, 3, .1)
self.tpr_smooth = 1 - norm.cdf(x, d, 1)
self.fpr_smooth = 1 - norm.cdf(x, -d, 1)
else:
mn_true = np.mean(self.input_values[self.binary_outcome])
mn_false = np.mean(self.input_values[~self.binary_outcome])
var_true = np.var(self.input_values[self.binary_outcome])
var_false = np.var(self.input_values[~self.binary_outcome])
pooled_sd = np.sqrt((var_true*(self.n_true-1))/(self.n_true + self.n_false - 2))
d = (mn_true-mn_false)/pooled_sd
pooled_sd = np.sqrt((var_true*(self.n_true - 1))/(self.n_true + self.n_false - 2))
d = (mn_true - mn_false)/pooled_sd
z_true = mn_true/pooled_sd
z_false = mn_false/pooled_sd

x = np.arange(z_false-3,z_true+3,.1)
tpr_smooth = 1-(norm.cdf(x, z_true,1))
fpr_smooth = 1-(norm.cdf(x, z_false,1))
x = np.arange(z_false-3, z_true+3, .1)
self.tpr_smooth = 1 - (norm.cdf(x, z_true, 1))
self.fpr_smooth = 1 - (norm.cdf(x, z_false, 1))

fig = roc_plot(fpr_smooth,tpr_smooth)
self.aucn = auc(self.fpr_smooth, self.tpr_smooth)
fig = roc_plot(self.fpr_smooth, self.tpr_smooth)

elif plot_method == 'observed':
fig = roc_plot(self.fpr, self.tpr)
Expand Down

0 comments on commit 39004d3

Please sign in to comment.