Skip to content

Commit

Permalink
Revert "Revert #163"
Browse files Browse the repository at this point in the history
  • Loading branch information
Jacobluke- authored Mar 2, 2024
1 parent 24ca6b0 commit c0e694d
Show file tree
Hide file tree
Showing 26 changed files with 711 additions and 173 deletions.
14 changes: 14 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Release notes

<!-- do not remove -->

## 2023.03.29

### New Features
- Add new form of paired proportion plots for a better support of Repeated Measures


## 0.2.3

### Bug Fixes
- Fixes a bug that jammed up when the xvar column was already a pandas Categorical. Now we check for this and act appropriately.
2 changes: 0 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,6 @@ contributing](CONTRIBUTING.md), create a new issue using Feature request
template or create a new post in [our Google
Group](https://groups.google.com/g/estimationstats).


## Acknowledgements

We would like to thank alpha testers from the [Claridge-Chang
Expand All @@ -179,7 +178,6 @@ Stanislav Ott.
## Testing

To test DABEST, you need to install

[pytest](https://docs.pytest.org/en/latest) and
[nbdev](https://nbdev.fast.ai/).

Expand Down
10 changes: 9 additions & 1 deletion dabest/_bootstrap_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,15 @@ def __init__(
ttest_single = "NIL"
ttest_2_ind = "NIL"
ttest_2_paired = ttest_rel(x1, x2)[1]
wilcoxonresult = wilcoxon(x1, x2)[1]

try:
wilcoxonresult = wilcoxon(x1, x2)[1]
except ValueError as e:
warnings.warn("Wilcoxon test could not be performed. This might be due "
"to no variability in the difference of the paired groups. \n"
"Error: {}\n"
"For detailed information, please refer to https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.wilcoxon.html "
.format(e))
mannwhitneyresult = "NIL"

# Turns data into array, then tuple.
Expand Down
80 changes: 62 additions & 18 deletions dabest/_dabest_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from scipy.stats import norm
from scipy.stats import randint


# %% ../nbs/API/dabest_object.ipynb 6
class Dabest(object):

Expand Down Expand Up @@ -58,6 +57,18 @@ def __init__(
self._check_errors(x, y, idx, experiment, experiment_label, x1_level)


# Check if there is NaN under any of the paired settings
if self.__is_paired and self.__output_data.isnull().values.any():
import warnings
warn1 = f"NaN values detected under paired setting and removed,"
warn2 = f" please check your data."
warnings.warn(warn1 + warn2)
if x is not None and y is not None:
rmname = self.__output_data[self.__output_data[y].isnull()][self.__id_col].tolist()
self.__output_data = self.__output_data[~self.__output_data[self.__id_col].isin(rmname)]
elif x is None and y is None:
self.__output_data.dropna(inplace=True)

# create new x & idx and record the second variable if this is a valid 2x2 ANOVA case
if idx is None and x is not None and y is not None:
# Add a length check for unique values in the first element in list x,
Expand Down Expand Up @@ -442,26 +453,47 @@ def _check_errors(self, x, y, idx, experiment, experiment_label, x1_level):
raise ValueError(err0)

# Check if the columns stated are valid
# TODO instead of traversing twice idx you can traverse only once
# and break the loop if the condition is not satisfied?
# TODO What if the type is not str and not tuple,list? missing raise Error
if all([isinstance(i, str) for i in idx]):
if len(pd.unique([t for t in idx]).tolist()) != 2:
# Initialize a flag to track if any element in idx is neither str nor (tuple, list)
valid_types = True

# Initialize variables to track the conditions for str and (tuple, list)
is_str_condition_met, is_tuple_list_condition_met = False, False

# Single traversal for optimization
for item in idx:
if isinstance(item, str):
is_str_condition_met = True
elif isinstance(item, (tuple, list)) and len(item) == 2:
is_tuple_list_condition_met = True
else:
valid_types = False
break # Exit the loop if an invalid type is found

# Check if all types are valid
if not valid_types:
err0 = "`mini_meta` is True, but `idx` ({})".format(idx)
err1 = "does not contain exactly 2 unique columns."
raise ValueError(err0 + err1)

# Handling str type condition
if is_str_condition_met:
if len(pd.unique(idx).tolist()) != 2:
err0 = "`mini_meta` is True, but `idx` ({})".format(idx)
err1 = "does not contain exactly 2 columns."
err1 = "does not contain exactly 2 unique columns."
raise ValueError(err0 + err1)

if all([isinstance(i, (tuple, list)) for i in idx]):
# Handling (tuple, list) type condition
if is_tuple_list_condition_met:
all_idx_lengths = [len(t) for t in idx]
if (array(all_idx_lengths) != 2).any():
err1 = "`mini_meta` is True, but some idx "
err2 = "in {} does not consist only of two groups.".format(idx)
err1 = "`mini_meta` is True, but some elements in idx "
err2 = "in {} do not consist only of two groups.".format(idx)
raise ValueError(err1 + err2)

# TODO can you have True mini_meta and delta2 at the same time?

# Check if this is a 2x2 ANOVA case and x & y are valid columns
# Create experiment_label and x1_level
if self.__delta2:
elif self.__delta2:
if x is None:
error_msg = "If `delta2` is True. `x` parameter cannot be None. String or list expected"
raise ValueError(error_msg)
Expand Down Expand Up @@ -534,7 +566,6 @@ def _check_errors(self, x, y, idx, experiment, experiment_label, x1_level):
else:
x1_level = self.__output_data[x[0]].unique()

# TODO what if experiment is None?
elif experiment:
experiment_label = self.__output_data[experiment].unique()
x1_level = self.__output_data[x[0]].unique()
Expand All @@ -545,7 +576,16 @@ def _get_plot_data(self, x, y, all_plot_groups):
"""
Function to prepare some attributes for plotting
"""

# Check if there is NaN under any of the paired settings
if self.__is_paired is not None and self.__output_data.isnull().values.any():
print("Nan")
import warnings
warn1 = f"NaN values detected under paired setting and removed,"
warn2 = f" please check your data."
warnings.warn(warn1 + warn2)
rmname = self.__output_data[self.__output_data[y].isnull()][self.__id_col].tolist()
self.__output_data = self.__output_data[~self.__output_data[self.__id_col].isin(rmname)]

# Identify the type of data that was passed in.
if x is not None and y is not None:
# Assume we have a long dataset.
Expand Down Expand Up @@ -589,6 +629,13 @@ def _get_plot_data(self, x, y, all_plot_groups):
self.__xvar = "group"
self.__yvar = "value"

# Check if there is NaN under any of the paired settings
if self.__is_paired is not None and self.__output_data.isnull().values.any():
import warnings
warn1 = f"NaN values detected under paired setting and removed,"
warn2 = f" please check your data."
warnings.warn(warn1 + warn2)

# First, check we have all columns in the dataset.
for g in all_plot_groups:
if g not in self.__output_data.columns:
Expand All @@ -611,10 +658,7 @@ def _get_plot_data(self, x, y, all_plot_groups):
# Added in v0.2.7.
plot_data.dropna(axis=0, how="any", subset=[self.__yvar], inplace=True)

# TODO these comments should not be in the code but on the release notes of the package version
# Lines 131 to 140 added in v0.2.3.
# Fixes a bug that jammed up when the xvar column was already
# a pandas Categorical. Now we check for this and act appropriately.

if isinstance(plot_data[self.__xvar].dtype, pd.CategoricalDtype):
plot_data[self.__xvar].cat.remove_unused_categories(inplace=True)
plot_data[self.__xvar].cat.reorder_categories(
Expand Down
2 changes: 1 addition & 1 deletion dabest/_delta_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,7 +555,7 @@ def __repr__(self, header=True, sigfig=3):
bs = bs1 + bs2

pval_def1 = "Any p-value reported is the probability of observing the" + \
"effect size (or greater),\nassuming the null hypothesis of" + \
"effect size (or greater),\nassuming the null hypothesis of " + \
"zero difference is true."
pval_def2 = "\nFor each p-value, 5000 reshuffles of the " + \
"control and test labels were performed."
Expand Down
60 changes: 28 additions & 32 deletions dabest/_effsize_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ def __repr__(self, show_resample_count=True, define_pval=True, sigfig=3):

pval_def1 = (
"Any p-value reported is the probability of observing the"
+ "effect size (or greater),\nassuming the null hypothesis of"
+ "effect size (or greater),\nassuming the null hypothesis of "
+ "zero difference is true."
)
pval_def2 = (
Expand Down Expand Up @@ -299,7 +299,6 @@ def _compute_bca_intervals(self, sorted_bootstraps):
)

else:
# TODO improve error handling, separate file with error messages?
err1 = "The $lim_type limit of the BCa interval cannot be computed."
err2 = "It is set to the effect size itself."
err3 = "All bootstrap values were likely all the same."
Expand Down Expand Up @@ -330,9 +329,16 @@ def _perform_statistical_test(self):

if self.__is_paired and not self.__proportional:
# Wilcoxon, a non-parametric version of the paired T-test.
wilcoxon = spstats.wilcoxon(self.__control, self.__test)
self.__pvalue_wilcoxon = wilcoxon.pvalue
self.__statistic_wilcoxon = wilcoxon.statistic
try:
wilcoxon = spstats.wilcoxon(self.__control, self.__test)
self.__pvalue_wilcoxon = wilcoxon.pvalue
self.__statistic_wilcoxon = wilcoxon.statistic
except ValueError as e:
warnings.warn("Wilcoxon test could not be performed. This might be due "
"to no variability in the difference of the paired groups. \n"
"Error: {}\n"
"For detailed information, please refer to https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.wilcoxon.html "
.format(e))

if self.__effect_size != "median_diff":
# Paired Student's t-test.
Expand All @@ -357,6 +363,16 @@ def _perform_statistical_test(self):
self.__pvalue_mcnemar = _mcnemar.pvalue
self.__statistic_mcnemar = _mcnemar.statistic

elif self.__proportional:
# The Cohen's h calculation is for binary categorical data
try:
self.__proportional_difference = es.cohens_h(
self.__control, self.__test
)
except ValueError as e:
warnings.warn(f"Calculation of Cohen's h failed. This method is applicable "
f"only for binary data (0's and 1's). Details: {e}")

elif self.__effect_size == "cliffs_delta":
# Let's go with Brunner-Munzel!
brunner_munzel = spstats.brunnermunzel(
Expand Down Expand Up @@ -398,23 +414,13 @@ def _perform_statistical_test(self):
)
self.__pvalue_mann_whitney = mann_whitney.pvalue
self.__statistic_mann_whitney = mann_whitney.statistic
except ValueError:
# TODO At least print some warning?
# Occurs when the control and test are exactly identical
# in terms of rank (eg. all zeros.)
pass
except ValueError as e:
warnings.warn("Mann-Whitney test could not be performed. This might be due "
"to identical rank values in both control and test groups. "
"Details: {}".format(e))

standardized_es = es.cohens_d(self.__control, self.__test, is_paired=None)

# The Cohen's h calculation is for binary categorical data
try:
self.__proportional_difference = es.cohens_h(
self.__control, self.__test
)
except ValueError:
# TODO At least print some warning?
# Occur only when the data consists not only 0's and 1's.
pass

def to_dict(self):
"""
Expand Down Expand Up @@ -567,87 +573,79 @@ def statistic_mcnemar(self):

@property
def pvalue_paired_students_t(self):
# TODO Missing docstring
try:
return self.__pvalue_paired_students_t
except AttributeError:
return npnan

@property
def statistic_paired_students_t(self):
# TODO Missing docstring
try:
return self.__statistic_paired_students_t
except AttributeError:
return npnan

@property
def pvalue_kruskal(self):
# TODO Missing docstring
try:
return self.__pvalue_kruskal
except AttributeError:
return npnan

@property
def statistic_kruskal(self):
# TODO Missing docstring
try:
return self.__statistic_kruskal
except AttributeError:
return npnan

@property
def pvalue_welch(self):
# TODO Missing docstring
try:
return self.__pvalue_welch
except AttributeError:
return npnan

@property
def statistic_welch(self):
# TODO Missing docstring
try:
return self.__statistic_welch
except AttributeError:
return npnan

@property
def pvalue_students_t(self):
# TODO Missing docstring
try:
return self.__pvalue_students_t
except AttributeError:
return npnan

@property
def statistic_students_t(self):
# TODO Missing docstring
try:
return self.__statistic_students_t
except AttributeError:
return npnan

@property
def pvalue_mann_whitney(self):
# TODO Missing docstring
try:
return self.__pvalue_mann_whitney
except AttributeError:
return npnan

@property
def statistic_mann_whitney(self):
# TODO Missing docstring
try:
return self.__statistic_mann_whitney
except AttributeError:
return npnan

@property
def pvalue_permutation(self):
# TODO Missing docstring
"""
p value of permutation test
"""
return self.__PermutationTest_result.pvalue

@property
Expand All @@ -663,12 +661,10 @@ def permutations(self):

@property
def permutations_var(self):
# TODO Missing docstring
return self.__PermutationTest_result.permutations_var

@property
def proportional_difference(self):
# TODO Missing docstring
try:
return self.__proportional_difference
except AttributeError:
Expand Down
4 changes: 3 additions & 1 deletion dabest/_stats_tools/confint_1group.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@ def compute_1group_jackknife(x, func, *args, **kwargs):


def compute_1group_acceleration(jack_dist):
# TODO is it needed a function to just call one line?
"""
Returns the accaleration value based on the jackknife distribution.
"""
from . import confint_2group_diff as ci_2g

return ci_2g._calc_accel(jack_dist)
Expand Down
Loading

0 comments on commit c0e694d

Please sign in to comment.