From e3d0f2b66ea9f2195892869ec0734a8e687b269c Mon Sep 17 00:00:00 2001 From: Luke Chang Date: Wed, 16 Aug 2017 06:55:24 -0400 Subject: [PATCH 1/2] updated roc to store gaussian weights Former-commit-id: 05b1f6525a53338c878fd975742983ab714ef0ec --- nltools/analysis.py | 48 ++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/nltools/analysis.py b/nltools/analysis.py index 86d3a540..67947972 100644 --- a/nltools/analysis.py +++ b/nltools/analysis.py @@ -49,7 +49,7 @@ def __init__(self, input_values=None, binary_outcome=None, if not any(binary_outcome): raise ValueError("Data Problem: binary_outcome may not be boolean") - thr_type = ['optimal_overall', 'optimal_balanced','minimum_sdt_bias'] + thr_type = ['optimal_overall', 'optimal_balanced', 'minimum_sdt_bias'] if threshold_type not in thr_type: raise ValueError("threshold_type must be ['optimal_overall', " "'optimal_balanced','minimum_sdt_bias']") @@ -58,8 +58,7 @@ def __init__(self, input_values=None, binary_outcome=None, self.binary_outcome = deepcopy(binary_outcome) self.threshold_type = deepcopy(threshold_type) self.forced_choice = deepcopy(forced_choice) - - if isinstance(self.binary_outcome,pd.DataFrame): + if isinstance(self.binary_outcome, pd.DataFrame): self.binary_outcome = np.array(self.binary_outcome).flatten() else: self.binary_outcome = deepcopy(binary_outcome) @@ -111,15 +110,15 @@ def calculate(self, input_values=None, binary_outcome=None, assert len(set(sub_idx).union(set(np.array(self.forced_choice)[self.binary_outcome]))) == len(sub_idx), "Issue with forced_choice subject labels." assert len(set(sub_idx).union(set(np.array(self.forced_choice)[~self.binary_outcome]))) == len(sub_idx), "Issue with forced_choice subject labels." for sub in sub_idx: - sub_mn = (self.input_values[(self.forced_choice==sub) & (self.binary_outcome==True)]+self.input_values[(self.forced_choice==sub) & (self.binary_outcome==False)])[0]/2 - self.input_values[(self.forced_choice==sub) & (self.binary_outcome==True)] = self.input_values[(self.forced_choice==sub) & (self.binary_outcome==True)][0] - sub_mn - self.input_values[(self.forced_choice==sub) & (self.binary_outcome==False)] = self.input_values[(self.forced_choice==sub) & (self.binary_outcome==False)][0] - sub_mn + sub_mn = (self.input_values[(self.forced_choice == sub) & (self.binary_outcome == True)]+self.input_values[(self.forced_choice == sub) & (self.binary_outcome == False)])[0]/2 + self.input_values[(self.forced_choice == sub) & (self.binary_outcome == True)] = self.input_values[(self.forced_choice == sub) & (self.binary_outcome == True)][0] - sub_mn + self.input_values[(self.forced_choice == sub) & (self.binary_outcome == False)] = self.input_values[(self.forced_choice == sub) & (self.binary_outcome == False)][0] - sub_mn self.class_thr = 0; # Calculate true positive and false positive rate self.tpr = np.zeros(self.criterion_values.shape) self.fpr = np.zeros(self.criterion_values.shape) - for i,x in enumerate(self.criterion_values): + for i, x in enumerate(self.criterion_values): wh = self.input_values >= x self.tpr[i] = np.sum(wh[self.binary_outcome])/np.sum(self.binary_outcome) self.fpr[i] = np.sum(wh[~self.binary_outcome])/np.sum(~self.binary_outcome) @@ -131,16 +130,16 @@ def calculate(self, input_values=None, binary_outcome=None, if self.forced_choice is None: self.threshold_type = threshold_type if threshold_type == 'optimal_balanced': - mn = (tpr+fpr)/2 + mn = (tpr + fpr)/2 self.class_thr = self.criterion_values[np.argmax(mn)] elif threshold_type == 'optimal_overall': n_corr_t = self.tpr*self.n_true - n_corr_f = (1-self.fpr)*self.n_false - sm = (n_corr_t+n_corr_f) + n_corr_f = (1 - self.fpr)*self.n_false + sm = (n_corr_t + n_corr_f) self.class_thr = self.criterion_values[np.argmax(sm)] elif threshold_type == 'minimum_sdt_bias': # Calculate MacMillan and Creelman 2005 Response Bias (c_bias) - c_bias = ( norm.ppf(np.maximum(.0001, np.minimum(0.9999, self.tpr))) + norm.ppf(np.maximum(.0001, np.minimum(0.9999, self.fpr))) ) / float(2) + c_bias = (norm.ppf(np.maximum(.0001, np.minimum(0.9999, self.tpr))) + norm.ppf(np.maximum(.0001, np.minimum(0.9999, self.fpr)))) / float(2) self.class_thr = self.criterion_values[np.argmin(abs(c_bias))] # Calculate output @@ -161,7 +160,7 @@ def calculate(self, input_values=None, binary_outcome=None, # Calculate Accuracy if balanced_acc: - self.accuracy = np.mean([self.sensitivity,self.specificity]) #See Brodersen, Ong, Stephan, Buhmann (2010) + self.accuracy = np.mean([self.sensitivity, self.specificity]) #See Brodersen, Ong, Stephan, Buhmann (2010) else: self.accuracy = 1 - np.mean(self.misclass) @@ -171,7 +170,7 @@ def calculate(self, input_values=None, binary_outcome=None, self.accuracy_se = np.sqrt(np.mean(~self.misclass) * (np.mean(~self.misclass)) / self.n) - def plot(self, plot_method = 'gaussian'): + def plot(self, plot_method = 'gaussian', balanced_acc=False, **kwargs): """ Create ROC Plot Create a specific kind of ROC curve plot, based on input values @@ -188,38 +187,39 @@ def plot(self, plot_method = 'gaussian'): """ - self.calculate() # Calculate ROC parameters + self.calculate(balanced_acc=balanced_acc) # Calculate ROC parameters + if plot_method == 'gaussian': if self.forced_choice is not None: sub_idx = np.unique(self.forced_choice) diff_scores = [] for sub in sub_idx: - diff_scores.append(self.input_values[(self.forced_choice == sub) & (self.binary_outcome==True)][0] - self.input_values[(self.forced_choice==sub) & (self.binary_outcome==False)][0]) + diff_scores.append(self.input_values[(self.forced_choice == sub) & (self.binary_outcome == True)][0] - self.input_values[(self.forced_choice == sub) & (self.binary_outcome == False)][0]) diff_scores = np.array(diff_scores) mn_diff = np.mean(diff_scores) d = mn_diff / np.std(diff_scores) pooled_sd = np.std(diff_scores) / np.sqrt(2); d_a_model = mn_diff / pooled_sd - x = np.arange(-3,3,.1) - tpr_smooth = 1 - norm.cdf(x, d, 1) - fpr_smooth = 1 - norm.cdf(x, -d, 1) + x = np.arange(-3, 3, .1) + self.tpr_smooth = 1 - norm.cdf(x, d, 1) + self.fpr_smooth = 1 - norm.cdf(x, -d, 1) else: mn_true = np.mean(self.input_values[self.binary_outcome]) mn_false = np.mean(self.input_values[~self.binary_outcome]) var_true = np.var(self.input_values[self.binary_outcome]) var_false = np.var(self.input_values[~self.binary_outcome]) - pooled_sd = np.sqrt((var_true*(self.n_true-1))/(self.n_true + self.n_false - 2)) - d = (mn_true-mn_false)/pooled_sd + pooled_sd = np.sqrt((var_true*(self.n_true - 1))/(self.n_true + self.n_false - 2)) + d = (mn_true - mn_false)/pooled_sd z_true = mn_true/pooled_sd z_false = mn_false/pooled_sd - x = np.arange(z_false-3,z_true+3,.1) - tpr_smooth = 1-(norm.cdf(x, z_true,1)) - fpr_smooth = 1-(norm.cdf(x, z_false,1)) + x = np.arange(z_false-3, z_true+3, .1) + self.tpr_smooth = 1 - (norm.cdf(x, z_true, 1)) + self.fpr_smooth = 1 - (norm.cdf(x, z_false, 1)) - fig = roc_plot(fpr_smooth,tpr_smooth) + fig = roc_plot(self.fpr_smooth, self.tpr_smooth) elif plot_method == 'observed': fig = roc_plot(self.fpr, self.tpr) From 4a386db930eb7e44a774b1da296b280d1871b4b3 Mon Sep 17 00:00:00 2001 From: Luke Chang Date: Sun, 5 Nov 2017 21:53:38 -0800 Subject: [PATCH 2/2] fixed roc forced choice accuracy Former-commit-id: e93b4926bf9a5079a568ee98313af5ec492f2a38 --- nltools/analysis.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/nltools/analysis.py b/nltools/analysis.py index be486462..3baca4c3 100644 --- a/nltools/analysis.py +++ b/nltools/analysis.py @@ -80,7 +80,9 @@ def calculate(self, input_values=None, binary_outcome=None, forced_choice: index indicating position for each unique subject (default=None) balanced_acc: balanced accuracy for single-interval classification - (bool) + (bool). THIS IS NOT COMPLETELY IMPLEMENTED BECAUSE + IT AFFECTS ACCURACY ESTIMATES, BUT NOT P-VALUES OR + THRESHOLD AT WHICH TO EVALUATE SENS/SPEC **kwargs: Additional keyword arguments to pass to the prediction algorithm @@ -199,9 +201,16 @@ def plot(self, plot_method = 'gaussian', balanced_acc=False, **kwargs): diff_scores = np.array(diff_scores) mn_diff = np.mean(diff_scores) d = mn_diff / np.std(diff_scores) - pooled_sd = np.std(diff_scores) / np.sqrt(2); + pooled_sd = np.std(diff_scores) / np.sqrt(2) d_a_model = mn_diff / pooled_sd + expected_acc = 1 - norm.cdf(0, d, 1) + self.sensitivity = expected_acc + self.specificity = expected_acc + self.ppv = self.sensitivity / (self.sensitivity + + 1 - self.specificity) + self.auc = norm.cdf(d_a_model / np.sqrt(2)) + x = np.arange(-3, 3, .1) self.tpr_smooth = 1 - norm.cdf(x, d, 1) self.fpr_smooth = 1 - norm.cdf(x, -d, 1) @@ -219,6 +228,7 @@ def plot(self, plot_method = 'gaussian', balanced_acc=False, **kwargs): self.tpr_smooth = 1 - (norm.cdf(x, z_true, 1)) self.fpr_smooth = 1 - (norm.cdf(x, z_false, 1)) + self.aucn = auc(self.fpr_smooth, self.tpr_smooth) fig = roc_plot(self.fpr_smooth, self.tpr_smooth) elif plot_method == 'observed':