From c56859daf6a7fc05f1744a6d0845d7c16c0b288c Mon Sep 17 00:00:00 2001 From: Peter Hedenskog Date: Thu, 7 Dec 2023 10:20:46 -0800 Subject: [PATCH] Make it easier to understand alternatives (#4023) * Makes it easier to understand alternative settings * make it easier to understand tests * better sync --- lib/cli/cli.js | 4 ++-- lib/plugins/compare/index.js | 4 ++-- lib/plugins/compare/pug/index.pug | 6 ++++++ lib/plugins/compare/statistical.py | 10 +++++----- 4 files changed, 15 insertions(+), 9 deletions(-) diff --git a/lib/cli/cli.js b/lib/cli/cli.js index 681e0a7388..e589d85989 100644 --- a/lib/cli/cli.js +++ b/lib/cli/cli.js @@ -1899,9 +1899,9 @@ export async function parseCommandLine() { }) .option('compare.alternative', { choices: ['less', ' greater', 'two-sided'], - default: 'less', + default: 'greater', describe: - 'Specifies the alternative hypothesis to be tested. Options are less for one-sided test where the first group is expected to be less than the second, greater for one-sided test with the first group expected to be greater, or two-sided for a two-sided test.', + 'Specifies the alternative hypothesis to be tested. Default is greater than means current data is greater than the baseline. two-sided means we look for different both ways and less means current is less than baseline. ', group: 'compare' }) .option('compare.wilcoxon.correction', { diff --git a/lib/plugins/compare/index.js b/lib/plugins/compare/index.js index b18e62be57..bf27fb9bd8 100644 --- a/lib/plugins/compare/index.js +++ b/lib/plugins/compare/index.js @@ -143,8 +143,8 @@ export default class ComparePlugin extends SitespeedioPlugin { baselineMetric.getValues() ); metricsInputData.metrics[group][metricName] = { - sample1: baselineStats.data, - sample2: currentStats.data + baseline: baselineStats.data, + current: currentStats.data }; } else { log.info( diff --git a/lib/plugins/compare/pug/index.pug b/lib/plugins/compare/pug/index.pug index cc9ccf9b2b..a94cee03bd 100644 --- a/lib/plugins/compare/pug/index.pug +++ b/lib/plugins/compare/pug/index.pug @@ -17,6 +17,12 @@ p h2 Settings p | The test conducted in this comparison is the #{compare.meta.testOptions.testType} test. The alternative hypothesis used for this test is "#{compare.meta.testOptions.alternative}". + if compare.meta.testOptions.alternative === 'less' + | This means that we test if the current test is significant less that the baseline tests. + else if compare.meta.testOptions.alternative === 'greater' + | This means that we test if the current test is siginficant higher than the baseline tests. + else if compare.meta.testOptions.alternative === 'two-sided' + | This means that we test the baseline vs the current tests both ways, if there are any change in both directions. if compare.meta.testOptions.testType === 'mannwhitneyu' | For more information on the settings of the Mann-Whitney U test, please refer to the a(href='https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.mannwhitneyu.html') official documentation. diff --git a/lib/plugins/compare/statistical.py b/lib/plugins/compare/statistical.py index 97a53fd4fd..29ca1ce6d3 100644 --- a/lib/plugins/compare/statistical.py +++ b/lib/plugins/compare/statistical.py @@ -6,15 +6,15 @@ def has_variability(sample): """Check if the sample has more than one unique value.""" return len(set(sample)) > 1 -def perform_test(test_type, sample1, sample2, **kwargs): +def perform_test(test_type, baseline, current, **kwargs): """Perform the statistical test based on the test type.""" - if not has_variability(sample1) or not has_variability(sample2): + if not has_variability(baseline) or not has_variability(current): return None, "No variability" if test_type == 'wilcoxon': - return wilcoxon(sample1, sample2, **kwargs) + return wilcoxon(current, baseline, **kwargs) elif test_type == 'mannwhitneyu': - return mannwhitneyu(sample1, sample2, **kwargs) + return mannwhitneyu(current, baseline, **kwargs) else: raise ValueError("Invalid test type. Choose 'wilcoxon' or 'mannwhitneyu'.") @@ -27,7 +27,7 @@ def perform_test(test_type, sample1, sample2, **kwargs): for group_name, metrics in input_data['metrics'].items(): group_results = {} for metric_name, metric_data in metrics.items(): - stat, p = perform_test(test_type, metric_data['sample1'], metric_data['sample2'], **options) + stat, p = perform_test(test_type, metric_data['baseline'], metric_data['current'], **options) if p == "No variability": group_results[metric_name] = {'statistic': "N/A", 'p-value': "N/A"} else: