From 695668a9d72e782b7062f67b3efaa1406e9acf0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Boris=20Cl=C3=A9net?= <117362283+bclenet@users.noreply.github.com> Date: Tue, 16 Apr 2024 16:45:37 +0200 Subject: [PATCH] Adding participants exclusions in narps_open_runner (#194) * Adding a command line tool showing the correlation results of a pipeline execution * [DOC] install doc about correlation command line tool [skip ci] * Modifications on runner * Correlation main + exclusions in runner --- INSTALL.md | 6 +++ narps_open/runner.py | 16 +++++- .../__init__.py} | 0 narps_open/utils/correlation/__main__.py | 53 +++++++++++++++++++ setup.py | 1 + tests/conftest.py | 18 +++---- 6 files changed, 84 insertions(+), 10 deletions(-) rename narps_open/utils/{correlation.py => correlation/__init__.py} (100%) create mode 100644 narps_open/utils/correlation/__main__.py diff --git a/INSTALL.md b/INSTALL.md index e9f124ba..28936287 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -95,6 +95,7 @@ Finally, you are able to use the scripts of the project : * `narps_open_runner`: run pipelines * `narps_open_tester`: run a pipeline and test its results against original ones from the team +* `narps_open_correlations`: compute and display correlation between results and original ones from the team * `narps_description`: get the textual description made by a team * `narps_results`: download the original results from teams * `narps_open_status`: get status information about the development process of the pipelines @@ -107,6 +108,10 @@ narps_open_runner -t 2T6S -n 40 # and produces a report with correlation values. narps_open_tester -t 08MQ +# Compute the correlation values between results of 2T6S reproduction on 60 subjects with original ones +# WARNING : 2T6S must have been previously computed with a group of 60 subjects +narps_open_correlations -t 2T6S -n 60 + # Get the description of team C88N in markdown formatting narps_description -t C88N --md @@ -121,6 +126,7 @@ narps_open_status --json > For further information about these command line tools, read the corresponding documentation pages. > * `narps_open_runner` : [docs/running.md](docs/running.md) > * `narps_open_tester` : [docs/testing.md](docs/testing.md#command-line-tool) +> * `narps_open_correlations` : [docs/correlation.md](docs/correlation.md#command-line-tool) > * `narps_description` : [docs/description.md](docs/description.md) > * `narps_results` : [docs/data.md](docs/data.md#results-from-narps-teams) > * `narps_open_status` : [docs/status.md](docs/status.md) diff --git a/narps_open/runner.py b/narps_open/runner.py index bf557ba0..597d1144 100644 --- a/narps_open/runner.py +++ b/narps_open/runner.py @@ -178,8 +178,15 @@ def main(): help='run the first levels only (preprocessing + subjects + runs)') parser.add_argument('-c', '--check', action='store_true', required=False, help='check pipeline outputs (runner is not launched)') + parser.add_argument('-e', '--exclusions', action='store_true', required=False, + help='run the analyses without the excluded subjects') arguments = parser.parse_args() + # Check arguments + if arguments.exclusions and not arguments.nsubjects: + print('Argument -e/--exclusions only works with -n/--nsubjects') + return + # Initialize a PipelineRunner runner = PipelineRunner(team_id = arguments.team) runner.pipeline.directories.dataset_dir = Configuration()['directories']['dataset'] @@ -193,7 +200,14 @@ def main(): elif arguments.rsubjects is not None: runner.random_nb_subjects = int(arguments.rsubjects) else: - runner.nb_subjects = int(arguments.nsubjects) + if arguments.exclusions: + # Intersection between the requested subset and the list of not excluded subjects + runner.subjects = list( + set(get_participants_subset(int(arguments.nsubjects))) + & set(get_participants(arguments.team)) + ) + else: + runner.nb_subjects = int(arguments.nsubjects) # Check data if arguments.check: diff --git a/narps_open/utils/correlation.py b/narps_open/utils/correlation/__init__.py similarity index 100% rename from narps_open/utils/correlation.py rename to narps_open/utils/correlation/__init__.py diff --git a/narps_open/utils/correlation/__main__.py b/narps_open/utils/correlation/__main__.py new file mode 100644 index 00000000..d086499b --- /dev/null +++ b/narps_open/utils/correlation/__main__.py @@ -0,0 +1,53 @@ +#!/usr/bin/python +# coding: utf-8 + +""" A command line tool for the narps_open.utils.correlation module """ + +from os.path import join +from argparse import ArgumentParser + +from narps_open.data.results import ResultsCollection +from narps_open.utils.configuration import Configuration +from narps_open.utils.correlation import get_correlation_coefficient +from narps_open.pipelines import get_implemented_pipelines +from narps_open.runner import PipelineRunner + +def main(): + """ Entry-point for the command line tool narps_open_correlations """ + + # Parse arguments + parser = ArgumentParser(description = 'Compare reproduced files to original results.') + parser.add_argument('-t', '--team', type = str, required = True, + help = 'the team ID', choices = get_implemented_pipelines()) + subjects.add_argument('-n', '--nsubjects', type=str, required = True, + help='the number of subjects to be selected') + arguments = parser.parse_args() + + # Initialize pipeline + runner = PipelineRunner(arguments.team) + runner.pipeline.directories.dataset_dir = Configuration()['directories']['dataset'] + runner.pipeline.directories.results_dir = Configuration()['directories']['reproduced_results'] + runner.pipeline.directories.set_output_dir_with_team_id(arguments.team) + runner.pipeline.directories.set_working_dir_with_team_id(arguments.team) + runner.nb_subjects = arguments.nsubjects + + # Indices and keys to the unthresholded maps + indices = list(range(1, 18, 2)) + + # Retrieve the paths to the reproduced files + reproduced_files = runner.pipeline.get_hypotheses_outputs() + reproduced_files = [reproduced_files[i] for i in indices] + + # Retrieve the paths to the results files + collection = ResultsCollection(arguments.team) + file_keys = [f'hypo{h}_unthresh.nii.gz' for h in range(1,10)] + results_files = [join(collection.directory, k) for k in file_keys] + + # Compute the correlation coefficients + print([ + get_correlation_coefficient(reproduced_file, results_file) + for reproduced_file, results_file in zip(reproduced_files, results_files) + ]) + +if __name__ == '__main__': + main() diff --git a/setup.py b/setup.py index b17409b6..e3c65bb0 100644 --- a/setup.py +++ b/setup.py @@ -71,6 +71,7 @@ 'narps_open_runner = narps_open.runner:main', 'narps_open_tester = narps_open.tester:main', 'narps_open_status = narps_open.utils.status:main', + 'narps_open_correlations = narps_open.utils.correlation.__main__:main', 'narps_description = narps_open.data.description.__main__:main', 'narps_results = narps_open.data.results.__main__:main' ] diff --git a/tests/conftest.py b/tests/conftest.py index f12f77a0..3e5570ff 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -22,6 +22,7 @@ from narps_open.utils.correlation import get_correlation_coefficient from narps_open.utils.configuration import Configuration from narps_open.data.results import ResultsCollection +from narps_open.data.participants import get_participants_subset # Init configuration, to ensure it is in testing mode Configuration(config_type='testing') @@ -88,13 +89,12 @@ def test_pipeline_execution( TODO : how to keep intermediate files of the low level for the next numbers of subjects ? - keep intermediate levels : boolean in PipelineRunner """ - # A list of number of subject to iterate over - nb_subjects_list = list(range( - Configuration()['testing']['pipelines']['nb_subjects_per_group'], - nb_subjects, - Configuration()['testing']['pipelines']['nb_subjects_per_group']) - ) - nb_subjects_list.append(nb_subjects) + # Create subdivisions of the requested subject list + nb_subjects_per_group = Configuration()['testing']['pipelines']['nb_subjects_per_group'] + all_subjects = get_participants_subset(nb_subjects) + subjects_lists = [] + for index in range(0, len(all_subjects), nb_subjects_per_group): + subjects_lists.append(all_subjects[index:index+nb_subjects_per_group]) # Initialize the pipeline runner = PipelineRunner(team_id) @@ -104,8 +104,8 @@ def test_pipeline_execution( runner.pipeline.directories.set_working_dir_with_team_id(team_id) # Run first level by (small) sub-groups of subjects - for subjects in nb_subjects_list: - runner.nb_subjects = subjects + for subjects_list in subjects_lists: + runner.subjects = subjects_list # Run as long as there are missing files after first level (with a max number of trials) # TODO : this is a workaround