From 27c21bf26b1ab2c6a36cce1ce96764ec457df164 Mon Sep 17 00:00:00 2001 From: leej3 Date: Tue, 11 May 2021 13:18:14 +0100 Subject: [PATCH 01/37] add initial prototype of design aggregation tool --- bids/__init__.py | 3 +- bids/statsmodels_design_synthesizer.py | 148 ++++++++++++++++++ .../test_statsmodels-design-synthesizer.py | 52 ++++++ setup.cfg | 1 + 4 files changed, 203 insertions(+), 1 deletion(-) create mode 100755 bids/statsmodels_design_synthesizer.py create mode 100644 bids/tests/test_statsmodels-design-synthesizer.py diff --git a/bids/__init__.py b/bids/__init__.py index f8e285c1f..3d38ffc26 100644 --- a/bids/__init__.py +++ b/bids/__init__.py @@ -13,7 +13,8 @@ "layout", "reports", "utils", - "variables" + "variables", + "statsmodels_design_synthesizer", ] due.cite(Doi("10.1038/sdata.2016.44"), diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py new file mode 100755 index 000000000..a85156f30 --- /dev/null +++ b/bids/statsmodels_design_synthesizer.py @@ -0,0 +1,148 @@ +#! /usr/bin/env python +import argparse +import sys +import json +from pathlib import Path +import pandas as pd +import numpy as np +from collections import namedtuple +from bids.modeling import transformations +from bids.utils import convert_JSON +from bids.variables import BIDSRunVariableCollection, SparseRunVariable +from bids.layout.utils import parse_file_entities + + +def statsmodels_design_synthesizer(params): + """Console script for bids statsmodels_design_synthesizer.""" + + # Output: + if not params.get("OUTPUT_DIR"): + output_tsv = params.get("OUTPUT_TSV", "aggregated_statsmodels_design.tsv") + + # Sampling rate of output + sampling_rate_out = params.get("OUTPUT_SAMPLING_RATE") + + # Process transformations file + # TODO: add transforms functionality, for now only model.json is handled + # TODO: some basic error checking to confirm the correct level of + # transformations has been obtained. This will most likely be the case since + # transformations at higher levels will no longer be required when the new + # "flow" approach is used. + transforms_file = Path(params["TRANSFORMS"]) + if not transforms_file.exists(): + raise ValueError(f"Cannot find {transforms_file}") + model = convert_JSON(json.loads(model_file.read_text())) + model_transforms = model["steps"][0]["transformations"] + + # Get relevant collection + coll_df = pd.read_csv(params["EVENTS_TSV"], delimiter="\t") + RunInfo = namedtuple("RunInfo", ["entities", "duration"]) + run_info = RunInfo(parse_file_entities(params["EVENTS_TSV"]), params["DURATION"]) + coll = get_events_collection(coll_df, [run_info]) + + # perform transformations + colls = transformations.TransformerManager().transform(coll, model_transforms) + + # Save colls + df_out = colls.to_df(sampling_rate=sampling_rate_out) + df_out.to_csv(output_tsv, index=None, sep="\t", na_rep="n/a") + + +def get_events_collection(_data, run_info, drop_na=True): + """ " + This is an attempt to minimally implement: + https://github.com/bids-standard/pybids/blob/statsmodels/bids/variables/io.py + """ + colls_output = [] + if "amplitude" in _data.columns: + if ( + _data["amplitude"].astype(int) == 1 + ).all() and "trial_type" in _data.columns: + msg = ( + "Column 'amplitude' with constant value 1 " + "is unnecessary in event files; ignoring it." + ) + _data = _data.drop("amplitude", axis=1) + else: + msg = "Column name 'amplitude' is reserved; " "renaming it to 'amplitude_'." + _data = _data.rename(columns={"amplitude": "amplitude_"}) + warnings.warn(msg) + + _data = _data.replace("n/a", np.nan) # Replace BIDS' n/a + _data = _data.apply(pd.to_numeric, errors="ignore") + + _cols = list(set(_data.columns.tolist()) - {"onset", "duration"}) + + # Construct a DataFrame for each extra column + for col in _cols: + df = _data[["onset", "duration"]].copy() + df["amplitude"] = _data[col].values + + # Add in all of the run's entities as new columns for + # index + # for entity, value in entities.items(): + # if entity in ALL_ENTITIES: + # df[entity] = value + # + if drop_na: + df = df.dropna(subset=["amplitude"]) + + if df.empty: + continue + var = SparseRunVariable(name=col, data=df, run_info=run_info, source="events") + colls_output.append(var) + + output = BIDSRunVariableCollection(colls_output) + return output + + +def create_parser(): + """Returns argument parser""" + p = argparse.ArgumentParser() + p.add_argument("--events-tsv", required=True, help="Path to events TSV") + p.add_argument( + "--transforms", required=True, help="Path to transform or model json" + ) + p.add_argument( + "--output-sampling-rate", + required=False, + help="Output sampling rate in Hz when output is dense instead of sparse", + ) + + pout = p.add_mutually_exclusive_group() + pout.add_argument( + "--output-tsv", + nargs="?", + help="Path to TSV containing a fully constructed design matrix.", + ) + pout.add_argument( + "--output-dir", + nargs="?", + help="Path to directory to write processed event files.", + ) + + ptimes = p.add_argument_group( + "Specify some essential details about the time series." + ) + ptimes.add_argument( + "--nvol", required=True, help="Number of volumes in func time-series" + ) + ptimes.add_argument("--tr", required=True, help="TR for func time series") + ptimes.add_argument("--ta", required=True, help="TA for events") + + return p + + +def main(user_args=None): + parser = create_parser() + if user_args is None: + namespace = parser.parse_args(sys.argv[1:]) + else: + namespace = parser.parse_args(user_args) + params = vars(namespace) + + statsmodels_design_synthesizer(params) + + +if __name__ == "__main__": + sys.exit(main()) # pragma: no cover""Main module.""" diff --git a/bids/tests/test_statsmodels-design-synthesizer.py b/bids/tests/test_statsmodels-design-synthesizer.py new file mode 100644 index 000000000..30404bab7 --- /dev/null +++ b/bids/tests/test_statsmodels-design-synthesizer.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python + +"""Tests for `bids_statsmodels_design_synthesizer` package.""" + +import pytest +import subprocess as sp +from pathlib import Path + +SYNTHESIZER = "statsmodels-design-synthesizer" +from bids.statsmodels_design_synthesizer import statsmodels_design_synthesizer as synth_mod + +# from bids_statsmodels_design_synthesizer import Path(SYNTHESIZER).stem as synth_mod +EXAMPLE_USER_ARGS = { + "OUTPUT_TSV": "aggregated_design.tsv", + "TRANSFORMS": "data/ds005/models/ds-005_type-mfx_model.json", + "EVENTS_TSV": "data/ds005/sub-01/func/sub-01_task-mixedgamblestask_run-01_events.tsv", + "TR": 2, + "TA": 2, + "NVOLS": 160, + } + + +def test_cli_help(): + with pytest.raises(sp.CalledProcessError): + output = sp.check_output([SYNTHESIZER, "-h"]) + with pytest.raises(sp.CalledProcessError): + output = sp.check_output([SYNTHESIZER, "--non-existent"]) + + +def test_design_aggregation_function(): + synth_mod.main(EXAMPLE_USER_ARGS) + + +def test_minimal_cli_functionality(): + """ + We roughly want to implement the equivalent of the following: + from bids.analysis import Analysis + from bids.layout import BIDSLayout + + layout = BIDSLayout("data/ds000003") + analysis = Analysis(model="data/ds000003/models/model-001_smdl.json",layout=layout) + analysis.setup() + + more specifically we want to reimplement this line + https://github.com/bids-standard/pybids/blob/b6cd0f6787230ce976a374fbd5fce650865752a3/bids/analysis/analysis.py#L282 + """ + bids_dir = Path(__file__).parent / "data/ds000003" + model = "model-001_smdl.json" + arg_list = " " .join([f"""--{k.lower().replace("_","-")}={v}""" for k,v in EXAMPLE_USER_ARGS.items()]) + cmd = f"{SYNTHESIZER} {arg_list}" + output = sp.check_output(cmd.split()) + diff --git a/setup.cfg b/setup.cfg index 2177dc9b9..e2aadbd4c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -65,6 +65,7 @@ dev = [options.entry_points] console_scripts = pybids=bids.cli:cli + statsmodels-design-synthesizer=bids.statsmodels_design_synthesizer:main [versioneer] VCS = git From bc6cd9116be928baca4de356217db356c85ff1e5 Mon Sep 17 00:00:00 2001 From: leej3 Date: Tue, 11 May 2021 14:50:34 +0100 Subject: [PATCH 02/37] tidy arg parsing --- bids/statsmodels_design_synthesizer.py | 39 ++++++++++++------- .../test_statsmodels-design-synthesizer.py | 19 ++++----- 2 files changed, 34 insertions(+), 24 deletions(-) diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py index a85156f30..c93b71943 100755 --- a/bids/statsmodels_design_synthesizer.py +++ b/bids/statsmodels_design_synthesizer.py @@ -16,11 +16,11 @@ def statsmodels_design_synthesizer(params): """Console script for bids statsmodels_design_synthesizer.""" # Output: - if not params.get("OUTPUT_DIR"): - output_tsv = params.get("OUTPUT_TSV", "aggregated_statsmodels_design.tsv") + if not params.get("output_dir"): + output_tsv = params.get("output_tsv", "aggregated_statsmodels_design.tsv") # Sampling rate of output - sampling_rate_out = params.get("OUTPUT_SAMPLING_RATE") + sampling_rate_out = params.get("output_sampling_rate") # Process transformations file # TODO: add transforms functionality, for now only model.json is handled @@ -28,16 +28,26 @@ def statsmodels_design_synthesizer(params): # transformations has been obtained. This will most likely be the case since # transformations at higher levels will no longer be required when the new # "flow" approach is used. - transforms_file = Path(params["TRANSFORMS"]) + transforms_file = Path(params["transforms"]) if not transforms_file.exists(): raise ValueError(f"Cannot find {transforms_file}") - model = convert_JSON(json.loads(model_file.read_text())) - model_transforms = model["steps"][0]["transformations"] + model = convert_JSON(json.loads(transforms_file.read_text())) + + if "nodes" in model: + nodes_key = "nodes" + elif "steps" in model: + nodes_key = "steps" + else: + raise ValueError("Cannot find a key for nodes in the model file") + model_transforms = model[nodes_key][0]["transformations"] + + duration = params["nvol"] * params["tr"] # Get relevant collection - coll_df = pd.read_csv(params["EVENTS_TSV"], delimiter="\t") + coll_df = pd.read_csv(params["events_tsv"], delimiter="\t") RunInfo = namedtuple("RunInfo", ["entities", "duration"]) - run_info = RunInfo(parse_file_entities(params["EVENTS_TSV"]), params["DURATION"]) + + run_info = RunInfo(parse_file_entities(params["events_tsv"]), duration) coll = get_events_collection(coll_df, [run_info]) # perform transformations @@ -106,6 +116,7 @@ def create_parser(): p.add_argument( "--output-sampling-rate", required=False, + type=float, help="Output sampling rate in Hz when output is dense instead of sparse", ) @@ -125,10 +136,12 @@ def create_parser(): "Specify some essential details about the time series." ) ptimes.add_argument( - "--nvol", required=True, help="Number of volumes in func time-series" + "--nvol", required=True, type=int, help="Number of volumes in func time-series" + ) + ptimes.add_argument( + "--tr", required=True, type=float, help="TR for func time series" ) - ptimes.add_argument("--tr", required=True, help="TR for func time series") - ptimes.add_argument("--ta", required=True, help="TA for events") + ptimes.add_argument("--ta", required=True, type=float, help="TA for events") return p @@ -137,9 +150,9 @@ def main(user_args=None): parser = create_parser() if user_args is None: namespace = parser.parse_args(sys.argv[1:]) + params = vars(namespace) else: - namespace = parser.parse_args(user_args) - params = vars(namespace) + params = user_args statsmodels_design_synthesizer(params) diff --git a/bids/tests/test_statsmodels-design-synthesizer.py b/bids/tests/test_statsmodels-design-synthesizer.py index 30404bab7..b1e794334 100644 --- a/bids/tests/test_statsmodels-design-synthesizer.py +++ b/bids/tests/test_statsmodels-design-synthesizer.py @@ -7,22 +7,21 @@ from pathlib import Path SYNTHESIZER = "statsmodels-design-synthesizer" -from bids.statsmodels_design_synthesizer import statsmodels_design_synthesizer as synth_mod +from bids import statsmodels_design_synthesizer as synth_mod # from bids_statsmodels_design_synthesizer import Path(SYNTHESIZER).stem as synth_mod EXAMPLE_USER_ARGS = { - "OUTPUT_TSV": "aggregated_design.tsv", - "TRANSFORMS": "data/ds005/models/ds-005_type-mfx_model.json", - "EVENTS_TSV": "data/ds005/sub-01/func/sub-01_task-mixedgamblestask_run-01_events.tsv", - "TR": 2, - "TA": 2, - "NVOLS": 160, + "output_tsv": "aggregated_design.tsv", + "transforms": "data/ds005/models/ds-005_type-mfx_model.json", + "events_tsv": "data/ds005/sub-01/func/sub-01_task-mixedgamblestask_run-01_events.tsv", + "tr": 2, + "ta": 2, + "nvol": 160, } def test_cli_help(): - with pytest.raises(sp.CalledProcessError): - output = sp.check_output([SYNTHESIZER, "-h"]) + output = sp.check_output([SYNTHESIZER, "-h"]) with pytest.raises(sp.CalledProcessError): output = sp.check_output([SYNTHESIZER, "--non-existent"]) @@ -44,8 +43,6 @@ def test_minimal_cli_functionality(): more specifically we want to reimplement this line https://github.com/bids-standard/pybids/blob/b6cd0f6787230ce976a374fbd5fce650865752a3/bids/analysis/analysis.py#L282 """ - bids_dir = Path(__file__).parent / "data/ds000003" - model = "model-001_smdl.json" arg_list = " " .join([f"""--{k.lower().replace("_","-")}={v}""" for k,v in EXAMPLE_USER_ARGS.items()]) cmd = f"{SYNTHESIZER} {arg_list}" output = sp.check_output(cmd.split()) From bba5284562260afbdf3e74cdc7eed0858df2d452 Mon Sep 17 00:00:00 2001 From: shotgunosine Date: Tue, 11 May 2021 11:15:28 -0400 Subject: [PATCH 03/37] move get_events_collection_to variables.io --- bids/statsmodels_design_synthesizer.py | 56 ++----------- bids/variables/io.py | 105 +++++++++++++++---------- 2 files changed, 69 insertions(+), 92 deletions(-) diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py index c93b71943..bee5c5960 100755 --- a/bids/statsmodels_design_synthesizer.py +++ b/bids/statsmodels_design_synthesizer.py @@ -10,6 +10,8 @@ from bids.utils import convert_JSON from bids.variables import BIDSRunVariableCollection, SparseRunVariable from bids.layout.utils import parse_file_entities +from bids.variables.io import get_events_collection +from bids.variables.entities import RunNode def statsmodels_design_synthesizer(params): @@ -47,8 +49,9 @@ def statsmodels_design_synthesizer(params): coll_df = pd.read_csv(params["events_tsv"], delimiter="\t") RunInfo = namedtuple("RunInfo", ["entities", "duration"]) - run_info = RunInfo(parse_file_entities(params["events_tsv"]), duration) - coll = get_events_collection(coll_df, [run_info]) + #run_info = RunInfo(parse_file_entities(params["events_tsv"]), duration) + run = RunNode(parse_file_entities(params["events_tsv"]), None, duration, params["tr"], params["nvol"]) + coll = get_events_collection(coll_df, run, output='collection') # perform transformations colls = transformations.TransformerManager().transform(coll, model_transforms) @@ -57,55 +60,6 @@ def statsmodels_design_synthesizer(params): df_out = colls.to_df(sampling_rate=sampling_rate_out) df_out.to_csv(output_tsv, index=None, sep="\t", na_rep="n/a") - -def get_events_collection(_data, run_info, drop_na=True): - """ " - This is an attempt to minimally implement: - https://github.com/bids-standard/pybids/blob/statsmodels/bids/variables/io.py - """ - colls_output = [] - if "amplitude" in _data.columns: - if ( - _data["amplitude"].astype(int) == 1 - ).all() and "trial_type" in _data.columns: - msg = ( - "Column 'amplitude' with constant value 1 " - "is unnecessary in event files; ignoring it." - ) - _data = _data.drop("amplitude", axis=1) - else: - msg = "Column name 'amplitude' is reserved; " "renaming it to 'amplitude_'." - _data = _data.rename(columns={"amplitude": "amplitude_"}) - warnings.warn(msg) - - _data = _data.replace("n/a", np.nan) # Replace BIDS' n/a - _data = _data.apply(pd.to_numeric, errors="ignore") - - _cols = list(set(_data.columns.tolist()) - {"onset", "duration"}) - - # Construct a DataFrame for each extra column - for col in _cols: - df = _data[["onset", "duration"]].copy() - df["amplitude"] = _data[col].values - - # Add in all of the run's entities as new columns for - # index - # for entity, value in entities.items(): - # if entity in ALL_ENTITIES: - # df[entity] = value - # - if drop_na: - df = df.dropna(subset=["amplitude"]) - - if df.empty: - continue - var = SparseRunVariable(name=col, data=df, run_info=run_info, source="events") - colls_output.append(var) - - output = BIDSRunVariableCollection(colls_output) - return output - - def create_parser(): """Returns argument parser""" p = argparse.ArgumentParser() diff --git a/bids/variables/io.py b/bids/variables/io.py index 6c23a0df8..84c9160a2 100644 --- a/bids/variables/io.py +++ b/bids/variables/io.py @@ -10,7 +10,7 @@ from bids.utils import listify from .entities import NodeIndex from .variables import SparseRunVariable, DenseRunVariable, SimpleVariable - +from .collections import BIDSRunVariableCollection BASE_ENTITIES = ['subject', 'session', 'task', 'run'] ALL_ENTITIES = BASE_ENTITIES + ['datatype', 'suffix', 'acquisition'] @@ -120,6 +120,68 @@ def _get_nvols(img_f): return nvols +def get_events_collection(_data, run, entities=None, drop_na=True, output='run', columns=None): + """ + This is an attempt to minimally implement: + https://github.com/bids-standard/pybids/blob/statsmodels/bids/variables/io.py + + in a way that will still work for bids io, but will also work without layout. + """ + + if output == 'collection': + colls_output = [] + elif output != 'run': + raise ValueError(f"output must be one of [run, output], {output} was passed.") + + run_info = run.get_info() + if entities is None: + entities = run_info.entities + if 'amplitude' in _data.columns: + if (_data['amplitude'].astype(int) == 1).all() and \ + 'trial_type' in _data.columns: + msg = ("Column 'amplitude' with constant value 1 " + "is unnecessary in event files; ignoring it.") + _data = _data.drop('amplitude', axis=1) + else: + msg = ("Column name 'amplitude' is reserved; " + "renaming it to 'amplitude_'.") + _data = _data.rename( + columns={'amplitude': 'amplitude_'}) + warnings.warn(msg) + + _data = _data.replace('n/a', np.nan) # Replace BIDS' n/a + _data = _data.apply(pd.to_numeric, errors='ignore') + + _cols = columns or list(set(_data.columns.tolist()) - + {'onset', 'duration'}) + + # Construct a DataFrame for each extra column + for col in _cols: + df = _data[['onset', 'duration']].copy() + df['amplitude'] = _data[col].values + + # Add in all of the run's entities as new columns for + # index + for entity, value in entities.items(): + if entity in ALL_ENTITIES: + df[entity] = value + + if drop_na: + df = df.dropna(subset=['amplitude']) + + if df.empty: + continue + + var = SparseRunVariable( + name=col, data=df, run_info=run_info, source='events') + if output == 'run': + run.add_variable(var) + else: + colls_output.append(var) + if output == 'run': + return run + else: + return BIDSRunVariableCollection(colls_output) def _load_time_variables(layout, dataset=None, columns=None, scan_length=None, drop_na=True, events=True, physio=True, stim=True, @@ -258,46 +320,7 @@ def _load_time_variables(layout, dataset=None, columns=None, scan_length=None, img_f, extension='.tsv', suffix='events', all_=True, full_search=True, ignore_strict_entities=['suffix', 'extension']) for _data in dfs: - _data = pd.read_csv(_data, sep='\t') - if 'amplitude' in _data.columns: - if (_data['amplitude'].astype(int) == 1).all() and \ - 'trial_type' in _data.columns: - msg = ("Column 'amplitude' with constant value 1 " - "is unnecessary in event files; ignoring it.") - _data = _data.drop('amplitude', axis=1) - else: - msg = ("Column name 'amplitude' is reserved; " - "renaming it to 'amplitude_'.") - _data = _data.rename( - columns={'amplitude': 'amplitude_'}) - warnings.warn(msg) - - _data = _data.replace('n/a', np.nan) # Replace BIDS' n/a - _data = _data.apply(pd.to_numeric, errors='ignore') - - _cols = columns or list(set(_data.columns.tolist()) - - {'onset', 'duration'}) - - # Construct a DataFrame for each extra column - for col in _cols: - df = _data[['onset', 'duration']].copy() - df['amplitude'] = _data[col].values - - # Add in all of the run's entities as new columns for - # index - for entity, value in entities.items(): - if entity in ALL_ENTITIES: - df[entity] = value - - if drop_na: - df = df.dropna(subset=['amplitude']) - - if df.empty: - continue - - var = SparseRunVariable( - name=col, data=df, run_info=run_info, source='events') - run.add_variable(var) + run = get_events_collection(_data, run, entities) # Process confound files if regressors: From 0d2a888d2f92084c163892312cd23696e5606bd6 Mon Sep 17 00:00:00 2001 From: leej3 Date: Tue, 11 May 2021 20:00:12 +0100 Subject: [PATCH 04/37] fix data path during testing --- bids/tests/test_statsmodels-design-synthesizer.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bids/tests/test_statsmodels-design-synthesizer.py b/bids/tests/test_statsmodels-design-synthesizer.py index b1e794334..1d197c02d 100644 --- a/bids/tests/test_statsmodels-design-synthesizer.py +++ b/bids/tests/test_statsmodels-design-synthesizer.py @@ -10,10 +10,11 @@ from bids import statsmodels_design_synthesizer as synth_mod # from bids_statsmodels_design_synthesizer import Path(SYNTHESIZER).stem as synth_mod +DATA_DIR = (Path(__file__).parent / "data/ds005").absolute() EXAMPLE_USER_ARGS = { "output_tsv": "aggregated_design.tsv", - "transforms": "data/ds005/models/ds-005_type-mfx_model.json", - "events_tsv": "data/ds005/sub-01/func/sub-01_task-mixedgamblestask_run-01_events.tsv", + "transforms": f"{DATA_DIR}/models/ds-005_type-mfx_model.json", + "events_tsv": f"{DATA_DIR}/sub-01/func/sub-01_task-mixedgamblestask_run-01_events.tsv", "tr": 2, "ta": 2, "nvol": 160, From 4e41306d08f47b4b95f56c13dfbe05c390884819 Mon Sep 17 00:00:00 2001 From: leej3 Date: Tue, 11 May 2021 20:13:34 +0100 Subject: [PATCH 05/37] fix event file reading --- bids/variables/io.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bids/variables/io.py b/bids/variables/io.py index 84c9160a2..5ad1e9bb5 100644 --- a/bids/variables/io.py +++ b/bids/variables/io.py @@ -320,6 +320,7 @@ def _load_time_variables(layout, dataset=None, columns=None, scan_length=None, img_f, extension='.tsv', suffix='events', all_=True, full_search=True, ignore_strict_entities=['suffix', 'extension']) for _data in dfs: + _data = pd.read_csv(_data, sep='\t') run = get_events_collection(_data, run, entities) # Process confound files From 14d32dfd77fe57474c2a75351afd82429ce75fe7 Mon Sep 17 00:00:00 2001 From: leej3 Date: Wed, 12 May 2021 11:15:56 +0100 Subject: [PATCH 06/37] tidy get_events_collection --- bids/variables/io.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bids/variables/io.py b/bids/variables/io.py index 5ad1e9bb5..91444c0fd 100644 --- a/bids/variables/io.py +++ b/bids/variables/io.py @@ -120,7 +120,7 @@ def _get_nvols(img_f): return nvols -def get_events_collection(_data, run, entities=None, drop_na=True, output='run', columns=None): +def get_events_collection(_data, run, drop_na=True, columns=None, entities=None, output='run'): """ This is an attempt to minimally implement: https://github.com/bids-standard/pybids/blob/statsmodels/bids/variables/io.py @@ -316,12 +316,12 @@ def _load_time_variables(layout, dataset=None, columns=None, scan_length=None, # Process event files if events: - dfs = layout.get_nearest( + efiles = layout.get_nearest( img_f, extension='.tsv', suffix='events', all_=True, full_search=True, ignore_strict_entities=['suffix', 'extension']) - for _data in dfs: - _data = pd.read_csv(_data, sep='\t') - run = get_events_collection(_data, run, entities) + for ef in efiles: + _data = pd.read_csv(ef, sep='\t') + run = get_events_collection(_data, run, drop_na=drop_na, columns=columns) # Process confound files if regressors: From 0fb031bf8d9b54c40c5a1c4ff9578767930d99a1 Mon Sep 17 00:00:00 2001 From: leej3 Date: Wed, 12 May 2021 11:17:51 +0100 Subject: [PATCH 07/37] move loading of regressor into function --- bids/statsmodels_design_synthesizer.py | 3 ++ bids/variables/io.py | 46 +++++++++++++++++++++----- 2 files changed, 40 insertions(+), 9 deletions(-) diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py index bee5c5960..fbde2a433 100755 --- a/bids/statsmodels_design_synthesizer.py +++ b/bids/statsmodels_design_synthesizer.py @@ -25,6 +25,7 @@ def statsmodels_design_synthesizer(params): sampling_rate_out = params.get("output_sampling_rate") # Process transformations file + # TODO: abstact transforms file reading into a function. # TODO: add transforms functionality, for now only model.json is handled # TODO: some basic error checking to confirm the correct level of # transformations has been obtained. This will most likely be the case since @@ -50,6 +51,8 @@ def statsmodels_design_synthesizer(params): RunInfo = namedtuple("RunInfo", ["entities", "duration"]) #run_info = RunInfo(parse_file_entities(params["events_tsv"]), duration) + # TODO: this will need to be implemented without RunNode to break cyclic + # dependencies if transformations is to be extracted run = RunNode(parse_file_entities(params["events_tsv"]), None, duration, params["tr"], params["nvol"]) coll = get_events_collection(coll_df, run, output='collection') diff --git a/bids/variables/io.py b/bids/variables/io.py index 91444c0fd..3d9895d9d 100644 --- a/bids/variables/io.py +++ b/bids/variables/io.py @@ -183,6 +183,41 @@ def get_events_collection(_data, run, drop_na=True, columns=None, entities=None, else: return BIDSRunVariableCollection(colls_output) + +def get_regressors_collection(_data, run, columns=None, entities=None, output='run'): + + if output == 'collection': + colls_output = [] + elif output != 'run': + raise ValueError(f"output must be one of [run, output], {output} was passed.") + + run_info = run.get_info() + if entities is None: + entities = run_info.entities + + if columns is not None: + conf_cols = list(set(_data.columns) & set(columns)) + _data = _data.loc[:, conf_cols] + for col in _data.columns: + # TODO: output sampling rate should likely be used + sr = 1. / run.repetition_time + var = DenseRunVariable(name=col, values=_data[[col]], + run_info=run_info, source='regressors', + sampling_rate=sr) + + # TODO: this logic can be simplified. Can always append to a list and + # then add to the output object. + if output == 'run': + run.add_variable(var) + else: + colls_output.append(var) + if output == 'run': + return run + else: + return BIDSRunVariableCollection(colls_output) + + + def _load_time_variables(layout, dataset=None, columns=None, scan_length=None, drop_na=True, events=True, physio=True, stim=True, regressors=True, skip_empty=True, scope='all', @@ -331,15 +366,8 @@ def _load_time_variables(layout, dataset=None, columns=None, scan_length=None, **sub_ents) for cf in confound_files: _data = pd.read_csv(cf.path, sep='\t', na_values='n/a') - if columns is not None: - conf_cols = list(set(_data.columns) & set(columns)) - _data = _data.loc[:, conf_cols] - for col in _data.columns: - sr = 1. / run.repetition_time - var = DenseRunVariable(name=col, values=_data[[col]], - run_info=run_info, source='regressors', - sampling_rate=sr) - run.add_variable(var) + run = get_regressors_collection(_data, run, columns=columns) + # Process recordinging files rec_types = [] From 8ba2902c2174850009003461036379541ca66a97 Mon Sep 17 00:00:00 2001 From: leej3 Date: Wed, 12 May 2021 12:37:23 +0100 Subject: [PATCH 08/37] move loading of phys and stim files into function --- bids/variables/io.py | 114 +++++++++++++++++++++++++++---------------- 1 file changed, 72 insertions(+), 42 deletions(-) diff --git a/bids/variables/io.py b/bids/variables/io.py index 3d9895d9d..7124c12fe 100644 --- a/bids/variables/io.py +++ b/bids/variables/io.py @@ -185,7 +185,7 @@ def get_events_collection(_data, run, drop_na=True, columns=None, entities=None, def get_regressors_collection(_data, run, columns=None, entities=None, output='run'): - + # TODO: is drop na functionality required? if output == 'collection': colls_output = [] elif output != 'run': @@ -194,7 +194,7 @@ def get_regressors_collection(_data, run, columns=None, entities=None, output='r run_info = run.get_info() if entities is None: entities = run_info.entities - + if columns is not None: conf_cols = list(set(_data.columns) & set(columns)) _data = _data.loc[:, conf_cols] @@ -217,6 +217,73 @@ def get_regressors_collection(_data, run, columns=None, entities=None, output='r return BIDSRunVariableCollection(colls_output) +def get_rec_collection(rec_file,run,metadata,run_info=None,columns=None,entities=None, output='run'): + + if output == 'collection': + colls_output = [] + elif output != 'run': + raise ValueError(f"output must be one of [run, output], {output} was passed.") + + data = pd.read_csv(rec_file, sep='\t') + if output == 'collection': + colls_output = [] + elif output != 'run': + raise ValueError(f"output must be one of [run, output], {output} was passed.") + + if not run_info: + run_info = run.get_info() + + freq = metadata['SamplingFrequency'] + st = metadata['StartTime'] + rf_cols = metadata['Columns'] + data.columns = rf_cols + + # Filter columns if user passed names + if columns is not None: + rf_cols = list(set(rf_cols) & set(columns)) + data = data.loc[:, rf_cols] + + n_cols = len(rf_cols) + if not n_cols: + # nothing to do + return run + + # Keep only in-scan samples + if st < 0: + start_ind = np.floor(-st * freq) + values = data.values[start_ind:, :] + else: + values = data.values + + if st > 0: + n_pad = int(freq * st) + pad = np.zeros((n_pad, n_cols)) + values = np.r_[pad, values] + + n_rows = int(run.duration * freq) + if len(values) > n_rows: + values = values[:n_rows, :] + elif len(values) < n_rows: + pad = np.zeros((n_rows - len(values), n_cols)) + values = np.r_[values, pad] + + df = pd.DataFrame(values, columns=rf_cols) + source = 'physio' if '_physio.tsv' in rec_file else 'stim' + for col in df.columns: + var = DenseRunVariable(name=col, values=df[[col]], run_info=run_info, + source=source, sampling_rate=freq) + # TODO: this logic can be simplified. Can always append to a list and + # then add to the output object. + if output == 'run': + run.add_variable(var) + else: + colls_output.append(var) + if output == 'run': + return run + else: + return BIDSRunVariableCollection(colls_output) + + def _load_time_variables(layout, dataset=None, columns=None, scan_length=None, drop_na=True, events=True, physio=True, stim=True, @@ -384,46 +451,9 @@ def _load_time_variables(layout, dataset=None, columns=None, scan_length=None, metadata = layout.get_metadata(rf) if not metadata: raise ValueError("No .json sidecar found for '%s'." % rf) - data = pd.read_csv(rf, sep='\t') - freq = metadata['SamplingFrequency'] - st = metadata['StartTime'] - rf_cols = metadata['Columns'] - data.columns = rf_cols - - # Filter columns if user passed names - if columns is not None: - rf_cols = list(set(rf_cols) & set(columns)) - data = data.loc[:, rf_cols] - - n_cols = len(rf_cols) - if not n_cols: - continue - - # Keep only in-scan samples - if st < 0: - start_ind = np.floor(-st * freq) - values = data.values[start_ind:, :] - else: - values = data.values - - if st > 0: - n_pad = int(freq * st) - pad = np.zeros((n_pad, n_cols)) - values = np.r_[pad, values] - - n_rows = int(run.duration * freq) - if len(values) > n_rows: - values = values[:n_rows, :] - elif len(values) < n_rows: - pad = np.zeros((n_rows - len(values), n_cols)) - values = np.r_[values, pad] - - df = pd.DataFrame(values, columns=rf_cols) - source = 'physio' if '_physio.tsv' in rf else 'stim' - for col in df.columns: - var = DenseRunVariable(name=col, values=df[[col]], run_info=run_info, - source=source, sampling_rate=freq) - run.add_variable(var) + # rec_file passed in for now because rec_type needs to be inferred + run = get_rec_collection(rf, run, metadata, run_info=run_info, columns=columns) + return dataset From 605c2c09893843557c86514c4b3dd34b648920cc Mon Sep 17 00:00:00 2001 From: leej3 Date: Wed, 12 May 2021 13:53:42 +0100 Subject: [PATCH 09/37] output sampling rate not needed for reading input --- bids/variables/io.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bids/variables/io.py b/bids/variables/io.py index 7124c12fe..f3ac516a0 100644 --- a/bids/variables/io.py +++ b/bids/variables/io.py @@ -199,7 +199,6 @@ def get_regressors_collection(_data, run, columns=None, entities=None, output='r conf_cols = list(set(_data.columns) & set(columns)) _data = _data.loc[:, conf_cols] for col in _data.columns: - # TODO: output sampling rate should likely be used sr = 1. / run.repetition_time var = DenseRunVariable(name=col, values=_data[[col]], run_info=run_info, source='regressors', From b115bc03fe32561678deaa429950fcd30d812fbe Mon Sep 17 00:00:00 2001 From: leej3 Date: Wed, 12 May 2021 14:04:51 +0100 Subject: [PATCH 10/37] move source logic out of get_rec_collection --- bids/variables/io.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/bids/variables/io.py b/bids/variables/io.py index f3ac516a0..4ce0e3cbd 100644 --- a/bids/variables/io.py +++ b/bids/variables/io.py @@ -216,14 +216,13 @@ def get_regressors_collection(_data, run, columns=None, entities=None, output='r return BIDSRunVariableCollection(colls_output) -def get_rec_collection(rec_file,run,metadata,run_info=None,columns=None,entities=None, output='run'): +def get_rec_collection(data,run,metadata,source,run_info=None,columns=None,entities=None, output='run'): if output == 'collection': colls_output = [] elif output != 'run': raise ValueError(f"output must be one of [run, output], {output} was passed.") - data = pd.read_csv(rec_file, sep='\t') if output == 'collection': colls_output = [] elif output != 'run': @@ -267,7 +266,6 @@ def get_rec_collection(rec_file,run,metadata,run_info=None,columns=None,entities values = np.r_[values, pad] df = pd.DataFrame(values, columns=rf_cols) - source = 'physio' if '_physio.tsv' in rec_file else 'stim' for col in df.columns: var = DenseRunVariable(name=col, values=df[[col]], run_info=run_info, source=source, sampling_rate=freq) @@ -451,7 +449,15 @@ def _load_time_variables(layout, dataset=None, columns=None, scan_length=None, if not metadata: raise ValueError("No .json sidecar found for '%s'." % rf) # rec_file passed in for now because rec_type needs to be inferred - run = get_rec_collection(rf, run, metadata, run_info=run_info, columns=columns) + source = 'physio' if '_physio.tsv' in rf else 'stim' + data = pd.read_csv(rf, sep='\t') + run = get_rec_collection( + data, + run, + metadata, + source, + run_info=run_info, + columns=columns) return dataset From 63f84a56ac65458158146e74b3a484c2b0309a6b Mon Sep 17 00:00:00 2001 From: leej3 Date: Wed, 12 May 2021 14:07:14 +0100 Subject: [PATCH 11/37] will not drop na in records or reg collections for now --- bids/variables/io.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bids/variables/io.py b/bids/variables/io.py index 4ce0e3cbd..d69af8d2a 100644 --- a/bids/variables/io.py +++ b/bids/variables/io.py @@ -185,7 +185,6 @@ def get_events_collection(_data, run, drop_na=True, columns=None, entities=None, def get_regressors_collection(_data, run, columns=None, entities=None, output='run'): - # TODO: is drop na functionality required? if output == 'collection': colls_output = [] elif output != 'run': From 1ee5de1a7dde98f4174ba7e5e13916e8bc669671 Mon Sep 17 00:00:00 2001 From: leej3 Date: Wed, 12 May 2021 14:59:39 +0100 Subject: [PATCH 12/37] use tempdir for output during test --- bids/tests/test_statsmodels-design-synthesizer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bids/tests/test_statsmodels-design-synthesizer.py b/bids/tests/test_statsmodels-design-synthesizer.py index 1d197c02d..f33e53bdf 100644 --- a/bids/tests/test_statsmodels-design-synthesizer.py +++ b/bids/tests/test_statsmodels-design-synthesizer.py @@ -5,6 +5,7 @@ import pytest import subprocess as sp from pathlib import Path +import tempfile SYNTHESIZER = "statsmodels-design-synthesizer" from bids import statsmodels_design_synthesizer as synth_mod @@ -12,7 +13,7 @@ # from bids_statsmodels_design_synthesizer import Path(SYNTHESIZER).stem as synth_mod DATA_DIR = (Path(__file__).parent / "data/ds005").absolute() EXAMPLE_USER_ARGS = { - "output_tsv": "aggregated_design.tsv", + "output_dir": tempfile.TemporaryDirectory().name, "transforms": f"{DATA_DIR}/models/ds-005_type-mfx_model.json", "events_tsv": f"{DATA_DIR}/sub-01/func/sub-01_task-mixedgamblestask_run-01_events.tsv", "tr": 2, From 31776fa28af4b2fce6678e2d29537c02df24e385 Mon Sep 17 00:00:00 2001 From: leej3 Date: Wed, 12 May 2021 15:00:30 +0100 Subject: [PATCH 13/37] remove output-tsv arg and start sparse/dense saving --- bids/statsmodels_design_synthesizer.py | 32 ++++++++++++-------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py index fbde2a433..744cc132a 100755 --- a/bids/statsmodels_design_synthesizer.py +++ b/bids/statsmodels_design_synthesizer.py @@ -17,10 +17,6 @@ def statsmodels_design_synthesizer(params): """Console script for bids statsmodels_design_synthesizer.""" - # Output: - if not params.get("output_dir"): - output_tsv = params.get("output_tsv", "aggregated_statsmodels_design.tsv") - # Sampling rate of output sampling_rate_out = params.get("output_sampling_rate") @@ -57,11 +53,19 @@ def statsmodels_design_synthesizer(params): coll = get_events_collection(coll_df, run, output='collection') # perform transformations - colls = transformations.TransformerManager().transform(coll, model_transforms) + colls, colls_pre_densifification = transformations.TransformerManager(save_pre_dense=True).transform(coll, model_transforms) + + # Save sparse vars + df_sparse = colls_pre_densifification.to_df(include_dense=False) + df_sparse.to_csv(output_dir / "transformed_events.tsv", index=None, sep="\t", na_rep="n/a") + # Save dense vars + df_dense = colls.to_df(include_sparse=False) + df_out.to_csv(output_dir / "transformed_time_series.tsv", index=None, sep="\t", na_rep="n/a") - # Save colls - df_out = colls.to_df(sampling_rate=sampling_rate_out) - df_out.to_csv(output_tsv, index=None, sep="\t", na_rep="n/a") + # Save full design_matrix + if sampling_rate_out: + df_full = colls.to_df(sampling_rate=sampling_rate_out) + df_out.to_csv(output_dir / "aggregated_design.tsv", index=None, sep="\t", na_rep="n/a") def create_parser(): """Returns argument parser""" @@ -74,18 +78,12 @@ def create_parser(): "--output-sampling-rate", required=False, type=float, - help="Output sampling rate in Hz when output is dense instead of sparse", + help="Output sampling rate in Hz when a full design matrix is desired.", ) - pout = p.add_mutually_exclusive_group() - pout.add_argument( - "--output-tsv", - nargs="?", - help="Path to TSV containing a fully constructed design matrix.", - ) - pout.add_argument( + p.add_argument( "--output-dir", - nargs="?", + required=True, help="Path to directory to write processed event files.", ) From bc39cd6ecfe203043cd286c39ecb4e55dd33267e Mon Sep 17 00:00:00 2001 From: shotgunosine Date: Wed, 12 May 2021 16:11:55 -0400 Subject: [PATCH 14/37] have tfm manager check for densification or deletion of sparse variables --- bids/modeling/transformations/base.py | 35 ++++++++++++++++--- bids/statsmodels_design_synthesizer.py | 14 ++++++-- .../test_statsmodels-design-synthesizer.py | 14 +++++++- 3 files changed, 55 insertions(+), 8 deletions(-) diff --git a/bids/modeling/transformations/base.py b/bids/modeling/transformations/base.py index cc60e6873..a4d171851 100644 --- a/bids/modeling/transformations/base.py +++ b/bids/modeling/transformations/base.py @@ -11,10 +11,9 @@ import pandas as pd from bids.utils import listify, convert_JSON -from bids.variables import SparseRunVariable +from bids.variables import SparseRunVariable, BIDSRunVariableCollection from bids.modeling import transformations as pbt - class Transformation(metaclass=ABCMeta): ### Class-level settings ### @@ -405,13 +404,13 @@ class TransformerManager(object): If None, the PyBIDS transformations module is used. """ - def __init__(self, default=None): + def __init__(self, default=None, save_pre_dense=False): self.transformations = {} if default is None: # Default to PyBIDS transformations default = pbt self.default = default - + self.save_pre_dense = save_pre_dense def _sanitize_name(self, name): """ Replace any invalid/reserved transformation names with acceptable equivalents. @@ -448,6 +447,7 @@ def transform(self, collection, transformations): transformations : list List of transformations to apply. """ + changed_vars = [] for t in transformations: t = convert_JSON(t) # make sure all keys are snake case kwargs = dict(t) @@ -456,11 +456,38 @@ def transform(self, collection, transformations): # Check registered transformations; fall back on default module func = self.transformations.get(name, None) + pre_dense = {} if func is None: if not hasattr(self.default, name): raise ValueError("No transformation '%s' found: either " "explicitly register a handler, or pass a" " default module that supports it." % name) func = getattr(self.default, name) + # check for sparse variables here and save them + matching_sparse_cols = [] + if self.save_pre_dense: + for variable in collection.match_variables(cols, return_type='variable'): + if isinstance(variable, SparseRunVariable): + matching_sparse_cols.append(variable.clone()) + func(collection, cols, **kwargs) + + # check here to see if those variables are still sparse + # if so, continue, if not, save the sparse variables prior to transformation + if len(matching_sparse_cols) > 0: + for variable in matching_sparse_cols: + name = variable.name + matching_post_tfm = collection.match_variables(name, return_type='variable') + assert len(matching_post_tfm) < 2 + if (len(matching_post_tfm) == 0) or not isinstance(matching_post_tfm[0], SparseRunVariable): + changed_vars.append(variable) + + if self.save_pre_dense: + if len(changed_vars) > 0: + changed_vars = BIDSRunVariableCollection(changed_vars) + assert np.all([isinstance(vv, SparseRunVariable) for vv in changed_vars.variables.values()]) + return collection, changed_vars + else: + return collection, None return collection + diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py index 744cc132a..f1c9a9b7c 100755 --- a/bids/statsmodels_design_synthesizer.py +++ b/bids/statsmodels_design_synthesizer.py @@ -19,6 +19,8 @@ def statsmodels_design_synthesizer(params): # Sampling rate of output sampling_rate_out = params.get("output_sampling_rate") + output_dir = Path(params.get("output_dir", 'design_synthesizer')) + output_dir.mkdir(exist_ok=True) # Process transformations file # TODO: abstact transforms file reading into a function. @@ -56,11 +58,17 @@ def statsmodels_design_synthesizer(params): colls, colls_pre_densifification = transformations.TransformerManager(save_pre_dense=True).transform(coll, model_transforms) # Save sparse vars - df_sparse = colls_pre_densifification.to_df(include_dense=False) + try: + df_sparse = colls_pre_densifification.to_df(include_dense=False) + except AttributeError: + df_sparse = colls.to_df(include_dense=False) df_sparse.to_csv(output_dir / "transformed_events.tsv", index=None, sep="\t", na_rep="n/a") # Save dense vars - df_dense = colls.to_df(include_sparse=False) - df_out.to_csv(output_dir / "transformed_time_series.tsv", index=None, sep="\t", na_rep="n/a") + try: + df_dense = colls.to_df(include_sparse=False) + df_out.to_csv(output_dir / "transformed_time_series.tsv", index=None, sep="\t", na_rep="n/a") + except ValueError: + pass # Save full design_matrix if sampling_rate_out: diff --git a/bids/tests/test_statsmodels-design-synthesizer.py b/bids/tests/test_statsmodels-design-synthesizer.py index f33e53bdf..e4e43ccfd 100644 --- a/bids/tests/test_statsmodels-design-synthesizer.py +++ b/bids/tests/test_statsmodels-design-synthesizer.py @@ -20,7 +20,14 @@ "ta": 2, "nvol": 160, } - +EXAMPLE_USER_ARGS_2 = { + "output_dir": tempfile.TemporaryDirectory().name, + "transforms": f"{DATA_DIR}/models/ds-005_type-test_model.json", + "events_tsv": f"{DATA_DIR}/sub-01/func/sub-01_task-mixedgamblestask_run-01_events.tsv", + "tr": 2, + "ta": 2, + "nvol": 160, + } def test_cli_help(): output = sp.check_output([SYNTHESIZER, "-h"]) @@ -30,6 +37,7 @@ def test_cli_help(): def test_design_aggregation_function(): synth_mod.main(EXAMPLE_USER_ARGS) + synth_mod.main(EXAMPLE_USER_ARGS_2) def test_minimal_cli_functionality(): @@ -49,3 +57,7 @@ def test_minimal_cli_functionality(): cmd = f"{SYNTHESIZER} {arg_list}" output = sp.check_output(cmd.split()) + arg_list = " " .join([f"""--{k.lower().replace("_","-")}={v}""" for k,v in EXAMPLE_USER_ARGS_2.items()]) + cmd = f"{SYNTHESIZER} {arg_list}" + output = sp.check_output(cmd.split()) + From 044386bbd1765045bb5770eabde3a6cbac3e6b3c Mon Sep 17 00:00:00 2001 From: leej3 Date: Thu, 13 May 2021 08:16:04 +0100 Subject: [PATCH 15/37] parametrize tests --- .../test_statsmodels-design-synthesizer.py | 25 +++++++++++++------ 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/bids/tests/test_statsmodels-design-synthesizer.py b/bids/tests/test_statsmodels-design-synthesizer.py index e4e43ccfd..245a3f614 100644 --- a/bids/tests/test_statsmodels-design-synthesizer.py +++ b/bids/tests/test_statsmodels-design-synthesizer.py @@ -35,12 +35,24 @@ def test_cli_help(): output = sp.check_output([SYNTHESIZER, "--non-existent"]) -def test_design_aggregation_function(): +@pytest.mark.parametrize( + "test_case,user_args", + [ + ("Model type test", EXAMPLE_USER_ARGS), + ("Model type mfx", EXAMPLE_USER_ARGS_2), + ] +) +def test_design_aggregation_function(test_case,user_args): synth_mod.main(EXAMPLE_USER_ARGS) - synth_mod.main(EXAMPLE_USER_ARGS_2) - -def test_minimal_cli_functionality(): +@pytest.mark.parametrize( + "test_case,user_args", + [ + ("Model type test", EXAMPLE_USER_ARGS), + ("Model type mfx", EXAMPLE_USER_ARGS_2), + ] +) +def test_minimal_cli_functionality(test_case,user_args): """ We roughly want to implement the equivalent of the following: from bids.analysis import Analysis @@ -53,11 +65,8 @@ def test_minimal_cli_functionality(): more specifically we want to reimplement this line https://github.com/bids-standard/pybids/blob/b6cd0f6787230ce976a374fbd5fce650865752a3/bids/analysis/analysis.py#L282 """ - arg_list = " " .join([f"""--{k.lower().replace("_","-")}={v}""" for k,v in EXAMPLE_USER_ARGS.items()]) + arg_list = " " .join([f"""--{k.lower().replace("_","-")}={v}""" for k,v in user_args.items()]) cmd = f"{SYNTHESIZER} {arg_list}" output = sp.check_output(cmd.split()) - arg_list = " " .join([f"""--{k.lower().replace("_","-")}={v}""" for k,v in EXAMPLE_USER_ARGS_2.items()]) - cmd = f"{SYNTHESIZER} {arg_list}" - output = sp.check_output(cmd.split()) From ae83df93d1c6e5c4615c559731d5fa71a26f706b Mon Sep 17 00:00:00 2001 From: leej3 Date: Thu, 13 May 2021 08:25:09 +0100 Subject: [PATCH 16/37] remove stutter --- bids/statsmodels_design_synthesizer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py index f1c9a9b7c..79fdb14a0 100755 --- a/bids/statsmodels_design_synthesizer.py +++ b/bids/statsmodels_design_synthesizer.py @@ -55,11 +55,11 @@ def statsmodels_design_synthesizer(params): coll = get_events_collection(coll_df, run, output='collection') # perform transformations - colls, colls_pre_densifification = transformations.TransformerManager(save_pre_dense=True).transform(coll, model_transforms) + colls, colls_pre_densification = transformations.TransformerManager(save_pre_dense=True).transform(coll, model_transforms) # Save sparse vars try: - df_sparse = colls_pre_densifification.to_df(include_dense=False) + df_sparse = colls_pre_densification.to_df(include_dense=False) except AttributeError: df_sparse = colls.to_df(include_dense=False) df_sparse.to_csv(output_dir / "transformed_events.tsv", index=None, sep="\t", na_rep="n/a") From a8fb9237de1995ffdeab607bc1e4bd7e115a2082 Mon Sep 17 00:00:00 2001 From: leej3 Date: Thu, 13 May 2021 09:02:14 +0100 Subject: [PATCH 17/37] add test for sampling rate with associated fix --- bids/statsmodels_design_synthesizer.py | 4 ++-- bids/tests/test_statsmodels-design-synthesizer.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py index 79fdb14a0..7e54aa280 100755 --- a/bids/statsmodels_design_synthesizer.py +++ b/bids/statsmodels_design_synthesizer.py @@ -66,14 +66,14 @@ def statsmodels_design_synthesizer(params): # Save dense vars try: df_dense = colls.to_df(include_sparse=False) - df_out.to_csv(output_dir / "transformed_time_series.tsv", index=None, sep="\t", na_rep="n/a") + df_dense.to_csv(output_dir / "transformed_time_series.tsv", index=None, sep="\t", na_rep="n/a") except ValueError: pass # Save full design_matrix if sampling_rate_out: df_full = colls.to_df(sampling_rate=sampling_rate_out) - df_out.to_csv(output_dir / "aggregated_design.tsv", index=None, sep="\t", na_rep="n/a") + df_full.to_csv(output_dir / "aggregated_design.tsv", index=None, sep="\t", na_rep="n/a") def create_parser(): """Returns argument parser""" diff --git a/bids/tests/test_statsmodels-design-synthesizer.py b/bids/tests/test_statsmodels-design-synthesizer.py index 245a3f614..6fba5ef83 100644 --- a/bids/tests/test_statsmodels-design-synthesizer.py +++ b/bids/tests/test_statsmodels-design-synthesizer.py @@ -27,6 +27,7 @@ "tr": 2, "ta": 2, "nvol": 160, + "output_sampling_rate":10, } def test_cli_help(): From ce7a50b00004bb56af6aaa93bf62e8fedd715856 Mon Sep 17 00:00:00 2001 From: leej3 Date: Thu, 13 May 2021 09:10:29 +0100 Subject: [PATCH 18/37] move test output to the pytest temp dir --- bids/tests/test_statsmodels-design-synthesizer.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/bids/tests/test_statsmodels-design-synthesizer.py b/bids/tests/test_statsmodels-design-synthesizer.py index 6fba5ef83..b68320525 100644 --- a/bids/tests/test_statsmodels-design-synthesizer.py +++ b/bids/tests/test_statsmodels-design-synthesizer.py @@ -12,8 +12,10 @@ # from bids_statsmodels_design_synthesizer import Path(SYNTHESIZER).stem as synth_mod DATA_DIR = (Path(__file__).parent / "data/ds005").absolute() + +# Define some example user arg combinations (without output_dir which is better +# to define in the scope of the test) EXAMPLE_USER_ARGS = { - "output_dir": tempfile.TemporaryDirectory().name, "transforms": f"{DATA_DIR}/models/ds-005_type-mfx_model.json", "events_tsv": f"{DATA_DIR}/sub-01/func/sub-01_task-mixedgamblestask_run-01_events.tsv", "tr": 2, @@ -21,7 +23,6 @@ "nvol": 160, } EXAMPLE_USER_ARGS_2 = { - "output_dir": tempfile.TemporaryDirectory().name, "transforms": f"{DATA_DIR}/models/ds-005_type-test_model.json", "events_tsv": f"{DATA_DIR}/sub-01/func/sub-01_task-mixedgamblestask_run-01_events.tsv", "tr": 2, @@ -43,7 +44,8 @@ def test_cli_help(): ("Model type mfx", EXAMPLE_USER_ARGS_2), ] ) -def test_design_aggregation_function(test_case,user_args): +def test_design_aggregation_function(tmp_path,test_case,user_args): + user_args['output_dir'] = str(tmp_path) synth_mod.main(EXAMPLE_USER_ARGS) @pytest.mark.parametrize( @@ -53,7 +55,7 @@ def test_design_aggregation_function(test_case,user_args): ("Model type mfx", EXAMPLE_USER_ARGS_2), ] ) -def test_minimal_cli_functionality(test_case,user_args): +def test_minimal_cli_functionality(tmp_path,test_case,user_args): """ We roughly want to implement the equivalent of the following: from bids.analysis import Analysis @@ -66,6 +68,7 @@ def test_minimal_cli_functionality(test_case,user_args): more specifically we want to reimplement this line https://github.com/bids-standard/pybids/blob/b6cd0f6787230ce976a374fbd5fce650865752a3/bids/analysis/analysis.py#L282 """ + user_args['output_dir'] = str(tmp_path) arg_list = " " .join([f"""--{k.lower().replace("_","-")}={v}""" for k,v in user_args.items()]) cmd = f"{SYNTHESIZER} {arg_list}" output = sp.check_output(cmd.split()) From 810f29ee8800482f61080e44fa2a036bd719c4b4 Mon Sep 17 00:00:00 2001 From: leej3 Date: Thu, 13 May 2021 09:43:31 +0100 Subject: [PATCH 19/37] oops --- bids/tests/test_statsmodels-design-synthesizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bids/tests/test_statsmodels-design-synthesizer.py b/bids/tests/test_statsmodels-design-synthesizer.py index b68320525..e7dd6b49f 100644 --- a/bids/tests/test_statsmodels-design-synthesizer.py +++ b/bids/tests/test_statsmodels-design-synthesizer.py @@ -46,7 +46,7 @@ def test_cli_help(): ) def test_design_aggregation_function(tmp_path,test_case,user_args): user_args['output_dir'] = str(tmp_path) - synth_mod.main(EXAMPLE_USER_ARGS) + synth_mod.main(user_args) @pytest.mark.parametrize( "test_case,user_args", From 76c0c5476152a77229c951cc0aede1500ca2dfd0 Mon Sep 17 00:00:00 2001 From: leej3 Date: Thu, 13 May 2021 09:43:56 +0100 Subject: [PATCH 20/37] consider the sparse variables --- bids/statsmodels_design_synthesizer.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py index 7e54aa280..8eaa03ba3 100755 --- a/bids/statsmodels_design_synthesizer.py +++ b/bids/statsmodels_design_synthesizer.py @@ -58,6 +58,10 @@ def statsmodels_design_synthesizer(params): colls, colls_pre_densification = transformations.TransformerManager(save_pre_dense=True).transform(coll, model_transforms) # Save sparse vars + # TODO: consider cases where dense/sparse changes from transformation but + # sparse vars need to be combined between pre_densification and post + # transformation + # i.e. list(colls.variables.keys()) != [x.name for x in colls.get_sparse_variables()] try: df_sparse = colls_pre_densification.to_df(include_dense=False) except AttributeError: From a2fba9276c0c6d3150544a78edb78300b8b94cda Mon Sep 17 00:00:00 2001 From: leej3 Date: Thu, 13 May 2021 14:00:54 +0100 Subject: [PATCH 21/37] correct indentation bug transformations were previously not applied unless they were defined in the default transformations --- bids/modeling/transformations/base.py | 38 +++++++++++++-------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/bids/modeling/transformations/base.py b/bids/modeling/transformations/base.py index a4d171851..024f13801 100644 --- a/bids/modeling/transformations/base.py +++ b/bids/modeling/transformations/base.py @@ -456,31 +456,31 @@ def transform(self, collection, transformations): # Check registered transformations; fall back on default module func = self.transformations.get(name, None) - pre_dense = {} if func is None: if not hasattr(self.default, name): raise ValueError("No transformation '%s' found: either " "explicitly register a handler, or pass a" " default module that supports it." % name) func = getattr(self.default, name) - # check for sparse variables here and save them - matching_sparse_cols = [] - if self.save_pre_dense: - for variable in collection.match_variables(cols, return_type='variable'): - if isinstance(variable, SparseRunVariable): - matching_sparse_cols.append(variable.clone()) - - func(collection, cols, **kwargs) - - # check here to see if those variables are still sparse - # if so, continue, if not, save the sparse variables prior to transformation - if len(matching_sparse_cols) > 0: - for variable in matching_sparse_cols: - name = variable.name - matching_post_tfm = collection.match_variables(name, return_type='variable') - assert len(matching_post_tfm) < 2 - if (len(matching_post_tfm) == 0) or not isinstance(matching_post_tfm[0], SparseRunVariable): - changed_vars.append(variable) + + # check for sparse variables here and save them + matching_sparse_cols = [] + if self.save_pre_dense: + for variable in collection.match_variables(cols, return_type='variable'): + if isinstance(variable, SparseRunVariable): + matching_sparse_cols.append(variable.clone()) + + func(collection, cols, **kwargs) + + # check here to see if those variables are still sparse + # if so, continue, if not, save the sparse variables prior to transformation + if len(matching_sparse_cols) > 0: + for variable in matching_sparse_cols: + name = variable.name + matching_post_tfm = collection.match_variables(name, return_type='variable') + assert len(matching_post_tfm) < 2 + if (len(matching_post_tfm) == 0) or not isinstance(matching_post_tfm[0], SparseRunVariable): + changed_vars.append(variable) if self.save_pre_dense: if len(changed_vars) > 0: From 4a6dac085d8405035dd4152a3b17e3a295901f83 Mon Sep 17 00:00:00 2001 From: leej3 Date: Thu, 13 May 2021 14:12:03 +0100 Subject: [PATCH 22/37] update TODOs --- bids/statsmodels_design_synthesizer.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py index 8eaa03ba3..9a8919def 100755 --- a/bids/statsmodels_design_synthesizer.py +++ b/bids/statsmodels_design_synthesizer.py @@ -54,14 +54,11 @@ def statsmodels_design_synthesizer(params): run = RunNode(parse_file_entities(params["events_tsv"]), None, duration, params["tr"], params["nvol"]) coll = get_events_collection(coll_df, run, output='collection') - # perform transformations + # perform transformations, additionally save variables that were changed + # TODO: need to consider sparse to sparse colls, colls_pre_densification = transformations.TransformerManager(save_pre_dense=True).transform(coll, model_transforms) # Save sparse vars - # TODO: consider cases where dense/sparse changes from transformation but - # sparse vars need to be combined between pre_densification and post - # transformation - # i.e. list(colls.variables.keys()) != [x.name for x in colls.get_sparse_variables()] try: df_sparse = colls_pre_densification.to_df(include_dense=False) except AttributeError: From 1094c2f02c5458360fe3f81ed9c32377a5cc3590 Mon Sep 17 00:00:00 2001 From: shotgunosine Date: Thu, 13 May 2021 14:54:01 +0100 Subject: [PATCH 23/37] fix sparse var saving --- bids/modeling/transformations/base.py | 4 ++++ bids/statsmodels_design_synthesizer.py | 31 +++++++++++++++++++++----- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/bids/modeling/transformations/base.py b/bids/modeling/transformations/base.py index 024f13801..31ccc4ff2 100644 --- a/bids/modeling/transformations/base.py +++ b/bids/modeling/transformations/base.py @@ -464,6 +464,10 @@ def transform(self, collection, transformations): func = getattr(self.default, name) # check for sparse variables here and save them + # We want everything sparse (the last time it was sparse during the + # transformation process) and everything that is dense at the end + # of the transformations. This will allow downstream users to add + # convolutions etc. as they please. matching_sparse_cols = [] if self.save_pre_dense: for variable in collection.match_variables(cols, return_type='variable'): diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py index 9a8919def..3b67b317e 100755 --- a/bids/statsmodels_design_synthesizer.py +++ b/bids/statsmodels_design_synthesizer.py @@ -54,15 +54,34 @@ def statsmodels_design_synthesizer(params): run = RunNode(parse_file_entities(params["events_tsv"]), None, duration, params["tr"], params["nvol"]) coll = get_events_collection(coll_df, run, output='collection') - # perform transformations, additionally save variables that were changed - # TODO: need to consider sparse to sparse - colls, colls_pre_densification = transformations.TransformerManager(save_pre_dense=True).transform(coll, model_transforms) + # perform transformations, additionally save variables that were changed. + # If a column is transformed but not densified it will not be in + # colls_pre_densification. + colls, colls_pre_densification = ( + transformations.TransformerManager(save_pre_dense=True) + .transform(coll, model_transforms) + ) # Save sparse vars - try: - df_sparse = colls_pre_densification.to_df(include_dense=False) - except AttributeError: + if colls_pre_densification is not None: + final_sparse_names = set([vv.name for vv in colls.variables]) + pre_dense_names = set([vv.name for vv in colls_pre_densifification]) + shared_names = final_sparse_names.intersection(pre_dense_names) + if len(shared_names) > 0: + raise ValueError( + f"""Somehow you've ended up with a copy of {shared_names} in both the final + transformed variables and in the pre-densification variables. Did you delete a + variable and recreate one with same name?""" + ) + output = merge_collections( + [colls_pre_densification, BidsRunVariableCollection(colls.get_sparse_variables())] + ) + assert output.all_sparse() + + df_sparse = output.to_df() + else: df_sparse = colls.to_df(include_dense=False) + df_sparse.to_csv(output_dir / "transformed_events.tsv", index=None, sep="\t", na_rep="n/a") # Save dense vars try: From e1a977a5ef6d627b32f37a7d7ca0c2f5f8fef4af Mon Sep 17 00:00:00 2001 From: leej3 Date: Thu, 13 May 2021 17:51:43 +0100 Subject: [PATCH 24/37] more fixes for sparse/dense --- bids/statsmodels_design_synthesizer.py | 8 ++++---- bids/tests/test_statsmodels-design-synthesizer.py | 4 ++++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py index 3b67b317e..6a8739dce 100755 --- a/bids/statsmodels_design_synthesizer.py +++ b/bids/statsmodels_design_synthesizer.py @@ -8,7 +8,7 @@ from collections import namedtuple from bids.modeling import transformations from bids.utils import convert_JSON -from bids.variables import BIDSRunVariableCollection, SparseRunVariable +from bids.variables import BIDSRunVariableCollection, SparseRunVariable, merge_collections from bids.layout.utils import parse_file_entities from bids.variables.io import get_events_collection from bids.variables.entities import RunNode @@ -64,8 +64,8 @@ def statsmodels_design_synthesizer(params): # Save sparse vars if colls_pre_densification is not None: - final_sparse_names = set([vv.name for vv in colls.variables]) - pre_dense_names = set([vv.name for vv in colls_pre_densifification]) + final_sparse_names = set([vv for vv in colls.variables]) + pre_dense_names = set([vv for vv in colls_pre_densification.variables]) shared_names = final_sparse_names.intersection(pre_dense_names) if len(shared_names) > 0: raise ValueError( @@ -74,7 +74,7 @@ def statsmodels_design_synthesizer(params): variable and recreate one with same name?""" ) output = merge_collections( - [colls_pre_densification, BidsRunVariableCollection(colls.get_sparse_variables())] + [colls_pre_densification, BIDSRunVariableCollection(colls.get_sparse_variables())] ) assert output.all_sparse() diff --git a/bids/tests/test_statsmodels-design-synthesizer.py b/bids/tests/test_statsmodels-design-synthesizer.py index e7dd6b49f..b46318688 100644 --- a/bids/tests/test_statsmodels-design-synthesizer.py +++ b/bids/tests/test_statsmodels-design-synthesizer.py @@ -30,6 +30,9 @@ "nvol": 160, "output_sampling_rate":10, } +EXAMPLE_USER_ARGS_3 = EXAMPLE_USER_ARGS_2.copy() +EXAMPLE_USER_ARGS_3["transforms"] = f"{DATA_DIR}/models/ds-005_type-convolution_model.json" + def test_cli_help(): output = sp.check_output([SYNTHESIZER, "-h"]) @@ -42,6 +45,7 @@ def test_cli_help(): [ ("Model type test", EXAMPLE_USER_ARGS), ("Model type mfx", EXAMPLE_USER_ARGS_2), + ("Model type convolution", EXAMPLE_USER_ARGS_3), ] ) def test_design_aggregation_function(tmp_path,test_case,user_args): From 1889e41abe888cf25fa708784e5e635cdd6f2e89 Mon Sep 17 00:00:00 2001 From: leej3 Date: Thu, 13 May 2021 17:56:54 +0100 Subject: [PATCH 25/37] add model with convolution --- .../models/ds-005_type-convolution_model.json | 109 ++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 bids/tests/data/ds005/models/ds-005_type-convolution_model.json diff --git a/bids/tests/data/ds005/models/ds-005_type-convolution_model.json b/bids/tests/data/ds005/models/ds-005_type-convolution_model.json new file mode 100644 index 000000000..ff1e77bcb --- /dev/null +++ b/bids/tests/data/ds005/models/ds-005_type-convolution_model.json @@ -0,0 +1,109 @@ +{ + "Name": "test_model", + "Description": "simple test model", + "Nodes": [ + { + "Name": "run", + "Level": "Run", + "GroupBy": [ + "run", + "subject" + ], + "Model": { + "X": [ + "RT", + "gain" + ], + "Formula": "0 + RT * gain" + }, + "Transformations": [ + { + "Name": "Factor", + "Input": "trial_type" + }, + { + "Name": "Rename", + "Input": "trial_type.parametric gain", + "Output": "gain" + }, + { + "Name": "Threshold", + "Input": "respcat", + "Output": "pos_respcat", + "Binarize": true + }, + { + "Name": "Scale", + "Input": "RT" + }, + { + "Name": "Convolve", + "Input": ["gain", "pos_respcat"], + "Model": "spm" + } + ], + "DummyContrasts": { + "Type": "t" + } + }, + { + "Name": "participant", + "Level": "Subject", + "Model": { + "X": [ + "@intercept" + ] + }, + "DummyContrasts": { + "Type": "FEMA" + } + }, + { + "Name": "by-group", + "Level": "Dataset", + "Model": { + "X": [ + "@intercept" + ] + }, + "DummyContrasts": { + "Type": "t" + } + }, + { + "Name": "group-diff", + "Level": "Dataset", + "Model": { + "X": [ + "@intercept", + "sex" + ] + }, + "DummyContrasts": { + "Type": "t" + } + } + ], + "Edges": [ + { + "Source": "run", + "Destination": "participant", + "GroupBy": [ + "subject", + "contrast" + ] + }, + { + "Source": "participant", + "Destination": "by-group", + "GroupBy": [ + "sex" + ] + }, + { + "Source": "participant", + "Destination": "group-diff", + "GroupBy": [] + } + ] +} From a1764f198a6ce4bcecbe4853c689d9db1a631491 Mon Sep 17 00:00:00 2001 From: shotgunosine Date: Thu, 13 May 2021 12:57:47 -0400 Subject: [PATCH 26/37] Fix sparse variable filtering --- bids/statsmodels_design_synthesizer.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py index 6a8739dce..904d4bd3c 100755 --- a/bids/statsmodels_design_synthesizer.py +++ b/bids/statsmodels_design_synthesizer.py @@ -64,7 +64,8 @@ def statsmodels_design_synthesizer(params): # Save sparse vars if colls_pre_densification is not None: - final_sparse_names = set([vv for vv in colls.variables]) + final_sparse_colls = BIDSRunVariableCollection(colls.get_sparse_variables()) + final_sparse_names = set([vv for vv in final_sparse_colls.variables]) pre_dense_names = set([vv for vv in colls_pre_densification.variables]) shared_names = final_sparse_names.intersection(pre_dense_names) if len(shared_names) > 0: @@ -74,7 +75,7 @@ def statsmodels_design_synthesizer(params): variable and recreate one with same name?""" ) output = merge_collections( - [colls_pre_densification, BIDSRunVariableCollection(colls.get_sparse_variables())] + [colls_pre_densification, final_sparse_colls] ) assert output.all_sparse() From 53696641ecf69cc6d5eec3d1381e4e34c4d806ba Mon Sep 17 00:00:00 2001 From: shotgunosine Date: Thu, 13 May 2021 14:25:58 -0400 Subject: [PATCH 27/37] fix check columns in output dataframes --- bids/statsmodels_design_synthesizer.py | 1 + bids/tests/test_statsmodels-design-synthesizer.py | 13 ++++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py index 904d4bd3c..eb25320bf 100755 --- a/bids/statsmodels_design_synthesizer.py +++ b/bids/statsmodels_design_synthesizer.py @@ -68,6 +68,7 @@ def statsmodels_design_synthesizer(params): final_sparse_names = set([vv for vv in final_sparse_colls.variables]) pre_dense_names = set([vv for vv in colls_pre_densification.variables]) shared_names = final_sparse_names.intersection(pre_dense_names) + if len(shared_names) > 0: raise ValueError( f"""Somehow you've ended up with a copy of {shared_names} in both the final diff --git a/bids/tests/test_statsmodels-design-synthesizer.py b/bids/tests/test_statsmodels-design-synthesizer.py index b46318688..a930dd203 100644 --- a/bids/tests/test_statsmodels-design-synthesizer.py +++ b/bids/tests/test_statsmodels-design-synthesizer.py @@ -6,6 +6,7 @@ import subprocess as sp from pathlib import Path import tempfile +import pandas as pd SYNTHESIZER = "statsmodels-design-synthesizer" from bids import statsmodels_design_synthesizer as synth_mod @@ -45,13 +46,23 @@ def test_cli_help(): [ ("Model type test", EXAMPLE_USER_ARGS), ("Model type mfx", EXAMPLE_USER_ARGS_2), - ("Model type convolution", EXAMPLE_USER_ARGS_3), ] ) def test_design_aggregation_function(tmp_path,test_case,user_args): user_args['output_dir'] = str(tmp_path) synth_mod.main(user_args) +def test_design_aggregation_function_with_convolution(tmp_path): + EXAMPLE_USER_ARGS_3['output_dir'] = str(tmp_path) + synth_mod.main(EXAMPLE_USER_ARGS_3) + sparse_output = pd.read_csv(tmp_path/"transformed_events.tsv", sep='\t') + assert 'pos_respcat' in sparse_output.columns + assert 'gain' in sparse_output.columns + + dense_output = pd.read_csv(tmp_path/"transformed_time_series.tsv", sep='\t') + assert 'pos_respcat' in dense_output.columns + assert 'gain' in dense_output.columns + @pytest.mark.parametrize( "test_case,user_args", [ From 34a209fa3f9c0dbaf3486a5d542394d084303886 Mon Sep 17 00:00:00 2001 From: leej3 Date: Thu, 13 May 2021 20:12:01 +0100 Subject: [PATCH 28/37] use click for cli --- bids/statsmodels_design_synthesizer.py | 103 +++++++++--------- .../test_statsmodels-design-synthesizer.py | 9 +- 2 files changed, 54 insertions(+), 58 deletions(-) diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py index eb25320bf..bf138d34d 100755 --- a/bids/statsmodels_design_synthesizer.py +++ b/bids/statsmodels_design_synthesizer.py @@ -12,14 +12,52 @@ from bids.layout.utils import parse_file_entities from bids.variables.io import get_events_collection from bids.variables.entities import RunNode +import click +from . import __version__ -def statsmodels_design_synthesizer(params): - """Console script for bids statsmodels_design_synthesizer.""" - # Sampling rate of output - sampling_rate_out = params.get("output_sampling_rate") - output_dir = Path(params.get("output_dir", 'design_synthesizer')) +@click.command() +@click.version_option(__version__, prog_name='statsmodels_design_sythesizer') +@click.option( + "--events-tsv", required=True, help="Path to events TSV") +@click.option( + "--transforms", required=True, help="Path to transform or model json" + ) +@click.option( + "--nvol", required=True, type=int, help="Number of volumes in func time-series" + ) +@click.option( + "--tr", required=True, type=float, help="TR for func time series" + ) +@click.option( + "--ta", required=True, type=float, help="TA for events") +@click.option( + "--output-sampling-rate", + required=False, + type=float, + help="Output sampling rate in Hz when a full design matrix is desired.", + ) +@click.option( + "--output-dir", + required=False, + help="Path to directory to write processed event files.", + ) +def main(**kwargs): + statsmodels_design_synthesizer(**kwargs) + +def statsmodels_design_synthesizer( + *, + events_tsv, + transforms, + nvol, + tr, + ta, + output_sampling_rate=None, + output_dir=None, + ): + + output_dir = Path(output_dir or "design_synthesizer") output_dir.mkdir(exist_ok=True) # Process transformations file @@ -29,7 +67,7 @@ def statsmodels_design_synthesizer(params): # transformations has been obtained. This will most likely be the case since # transformations at higher levels will no longer be required when the new # "flow" approach is used. - transforms_file = Path(params["transforms"]) + transforms_file = Path(transforms) if not transforms_file.exists(): raise ValueError(f"Cannot find {transforms_file}") model = convert_JSON(json.loads(transforms_file.read_text())) @@ -42,16 +80,16 @@ def statsmodels_design_synthesizer(params): raise ValueError("Cannot find a key for nodes in the model file") model_transforms = model[nodes_key][0]["transformations"] - duration = params["nvol"] * params["tr"] + duration = nvol * tr # Get relevant collection - coll_df = pd.read_csv(params["events_tsv"], delimiter="\t") + coll_df = pd.read_csv(events_tsv, delimiter="\t") RunInfo = namedtuple("RunInfo", ["entities", "duration"]) #run_info = RunInfo(parse_file_entities(params["events_tsv"]), duration) # TODO: this will need to be implemented without RunNode to break cyclic # dependencies if transformations is to be extracted - run = RunNode(parse_file_entities(params["events_tsv"]), None, duration, params["tr"], params["nvol"]) + run = RunNode(parse_file_entities(events_tsv), None, duration, tr, nvol) coll = get_events_collection(coll_df, run, output='collection') # perform transformations, additionally save variables that were changed. @@ -93,53 +131,10 @@ def statsmodels_design_synthesizer(params): pass # Save full design_matrix - if sampling_rate_out: - df_full = colls.to_df(sampling_rate=sampling_rate_out) + if output_sampling_rate: + df_full = colls.to_df(sampling_rate=output_sampling_rate) df_full.to_csv(output_dir / "aggregated_design.tsv", index=None, sep="\t", na_rep="n/a") -def create_parser(): - """Returns argument parser""" - p = argparse.ArgumentParser() - p.add_argument("--events-tsv", required=True, help="Path to events TSV") - p.add_argument( - "--transforms", required=True, help="Path to transform or model json" - ) - p.add_argument( - "--output-sampling-rate", - required=False, - type=float, - help="Output sampling rate in Hz when a full design matrix is desired.", - ) - - p.add_argument( - "--output-dir", - required=True, - help="Path to directory to write processed event files.", - ) - - ptimes = p.add_argument_group( - "Specify some essential details about the time series." - ) - ptimes.add_argument( - "--nvol", required=True, type=int, help="Number of volumes in func time-series" - ) - ptimes.add_argument( - "--tr", required=True, type=float, help="TR for func time series" - ) - ptimes.add_argument("--ta", required=True, type=float, help="TA for events") - - return p - - -def main(user_args=None): - parser = create_parser() - if user_args is None: - namespace = parser.parse_args(sys.argv[1:]) - params = vars(namespace) - else: - params = user_args - - statsmodels_design_synthesizer(params) if __name__ == "__main__": diff --git a/bids/tests/test_statsmodels-design-synthesizer.py b/bids/tests/test_statsmodels-design-synthesizer.py index a930dd203..6ba1cd7e6 100644 --- a/bids/tests/test_statsmodels-design-synthesizer.py +++ b/bids/tests/test_statsmodels-design-synthesizer.py @@ -17,8 +17,8 @@ # Define some example user arg combinations (without output_dir which is better # to define in the scope of the test) EXAMPLE_USER_ARGS = { - "transforms": f"{DATA_DIR}/models/ds-005_type-mfx_model.json", "events_tsv": f"{DATA_DIR}/sub-01/func/sub-01_task-mixedgamblestask_run-01_events.tsv", + "transforms": f"{DATA_DIR}/models/ds-005_type-mfx_model.json", "tr": 2, "ta": 2, "nvol": 160, @@ -36,7 +36,7 @@ def test_cli_help(): - output = sp.check_output([SYNTHESIZER, "-h"]) + output = sp.check_output([SYNTHESIZER, "--help"]) with pytest.raises(sp.CalledProcessError): output = sp.check_output([SYNTHESIZER, "--non-existent"]) @@ -50,11 +50,12 @@ def test_cli_help(): ) def test_design_aggregation_function(tmp_path,test_case,user_args): user_args['output_dir'] = str(tmp_path) - synth_mod.main(user_args) + main_func = getattr(synth_mod, SYNTHESIZER.replace("-","_")) + main_func(**user_args) def test_design_aggregation_function_with_convolution(tmp_path): EXAMPLE_USER_ARGS_3['output_dir'] = str(tmp_path) - synth_mod.main(EXAMPLE_USER_ARGS_3) + synth_mod.statsmodels_design_synthesizer(**EXAMPLE_USER_ARGS_3) sparse_output = pd.read_csv(tmp_path/"transformed_events.tsv", sep='\t') assert 'pos_respcat' in sparse_output.columns assert 'gain' in sparse_output.columns From 3c57020b4b2110ffb012907345160589ccdc52b4 Mon Sep 17 00:00:00 2001 From: shotgunosine Date: Thu, 13 May 2021 16:15:04 -0400 Subject: [PATCH 29/37] enh don't rely on run node for get events collection --- bids/statsmodels_design_synthesizer.py | 8 ++++---- bids/variables/io.py | 26 ++++++++------------------ 2 files changed, 12 insertions(+), 22 deletions(-) diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py index bf138d34d..22ae3c47a 100755 --- a/bids/statsmodels_design_synthesizer.py +++ b/bids/statsmodels_design_synthesizer.py @@ -83,14 +83,14 @@ def statsmodels_design_synthesizer( duration = nvol * tr # Get relevant collection - coll_df = pd.read_csv(events_tsv, delimiter="\t") - RunInfo = namedtuple("RunInfo", ["entities", "duration"]) + coll_df = pd.read_csv(params["events_tsv"], delimiter="\t") + RunInfo = namedtuple('RunInfo', ['entities', 'duration', 'tr', 'image', 'n_vols']) #run_info = RunInfo(parse_file_entities(params["events_tsv"]), duration) # TODO: this will need to be implemented without RunNode to break cyclic # dependencies if transformations is to be extracted - run = RunNode(parse_file_entities(events_tsv), None, duration, tr, nvol) - coll = get_events_collection(coll_df, run, output='collection') + run_info = RunInfo(parse_file_entities(params["events_tsv"]), duration, params["tr"], None, params["nvol"]) + coll = BIDSRunVariableCollection(get_events_collection(coll_df, run_info)) # perform transformations, additionally save variables that were changed. # If a column is transformed but not densified it will not be in diff --git a/bids/variables/io.py b/bids/variables/io.py index d69af8d2a..14d6b821c 100644 --- a/bids/variables/io.py +++ b/bids/variables/io.py @@ -120,7 +120,7 @@ def _get_nvols(img_f): return nvols -def get_events_collection(_data, run, drop_na=True, columns=None, entities=None, output='run'): +def get_events_collection(_data, run_info, drop_na=True, columns=None, entities=None, output='run'): """ This is an attempt to minimally implement: https://github.com/bids-standard/pybids/blob/statsmodels/bids/variables/io.py @@ -128,12 +128,7 @@ def get_events_collection(_data, run, drop_na=True, columns=None, entities=None, in a way that will still work for bids io, but will also work without layout. """ - if output == 'collection': - colls_output = [] - elif output != 'run': - raise ValueError(f"output must be one of [run, output], {output} was passed.") - - run_info = run.get_info() + run_info if entities is None: entities = run_info.entities if 'amplitude' in _data.columns: @@ -154,7 +149,7 @@ def get_events_collection(_data, run, drop_na=True, columns=None, entities=None, _cols = columns or list(set(_data.columns.tolist()) - {'onset', 'duration'}) - + colls_output = [] # Construct a DataFrame for each extra column for col in _cols: df = _data[['onset', 'duration']].copy() @@ -174,15 +169,8 @@ def get_events_collection(_data, run, drop_na=True, columns=None, entities=None, var = SparseRunVariable( name=col, data=df, run_info=run_info, source='events') - if output == 'run': - run.add_variable(var) - else: - colls_output.append(var) - if output == 'run': - return run - else: - return BIDSRunVariableCollection(colls_output) - + colls_output.append(var) + return colls_output def get_regressors_collection(_data, run, columns=None, entities=None, output='run'): if output == 'collection': @@ -419,7 +407,9 @@ def _load_time_variables(layout, dataset=None, columns=None, scan_length=None, full_search=True, ignore_strict_entities=['suffix', 'extension']) for ef in efiles: _data = pd.read_csv(ef, sep='\t') - run = get_events_collection(_data, run, drop_na=drop_na, columns=columns) + event_cols = get_events_collection(_data, run.get_info(), drop_na=drop_na, columns=columns) + for ec in event_cols: + run.add_variable(ec) # Process confound files if regressors: From 979ec1051c1c8674d9482ea8fafb5d74c51acedb Mon Sep 17 00:00:00 2001 From: shotgunosine Date: Thu, 13 May 2021 16:37:02 -0400 Subject: [PATCH 30/37] enh remove run node from rec and reg loading --- bids/variables/io.py | 71 +++++++++++++------------------------------- 1 file changed, 20 insertions(+), 51 deletions(-) diff --git a/bids/variables/io.py b/bids/variables/io.py index 14d6b821c..4886e9d07 100644 --- a/bids/variables/io.py +++ b/bids/variables/io.py @@ -120,7 +120,7 @@ def _get_nvols(img_f): return nvols -def get_events_collection(_data, run_info, drop_na=True, columns=None, entities=None, output='run'): +def get_events_collection(_data, run_info, drop_na=True, columns=None, entities=None): """ This is an attempt to minimally implement: https://github.com/bids-standard/pybids/blob/statsmodels/bids/variables/io.py @@ -172,13 +172,10 @@ def get_events_collection(_data, run_info, drop_na=True, columns=None, entities= colls_output.append(var) return colls_output -def get_regressors_collection(_data, run, columns=None, entities=None, output='run'): - if output == 'collection': - colls_output = [] - elif output != 'run': - raise ValueError(f"output must be one of [run, output], {output} was passed.") - run_info = run.get_info() +def get_regressors_collection(_data, run_info, columns=None, entities=None): + + colls_output = [] if entities is None: entities = run_info.entities @@ -186,38 +183,17 @@ def get_regressors_collection(_data, run, columns=None, entities=None, output='r conf_cols = list(set(_data.columns) & set(columns)) _data = _data.loc[:, conf_cols] for col in _data.columns: - sr = 1. / run.repetition_time + sr = 1. / run_info.tr var = DenseRunVariable(name=col, values=_data[[col]], run_info=run_info, source='regressors', sampling_rate=sr) + colls_output.append(var) + return colls_output - # TODO: this logic can be simplified. Can always append to a list and - # then add to the output object. - if output == 'run': - run.add_variable(var) - else: - colls_output.append(var) - if output == 'run': - return run - else: - return BIDSRunVariableCollection(colls_output) - - -def get_rec_collection(data,run,metadata,source,run_info=None,columns=None,entities=None, output='run'): - - if output == 'collection': - colls_output = [] - elif output != 'run': - raise ValueError(f"output must be one of [run, output], {output} was passed.") - - if output == 'collection': - colls_output = [] - elif output != 'run': - raise ValueError(f"output must be one of [run, output], {output} was passed.") - if not run_info: - run_info = run.get_info() +def get_rec_collection(data,run_info,metadata,source,columns=None,entities=None): + colls_output = [] freq = metadata['SamplingFrequency'] st = metadata['StartTime'] rf_cols = metadata['Columns'] @@ -231,7 +207,7 @@ def get_rec_collection(data,run,metadata,source,run_info=None,columns=None,entit n_cols = len(rf_cols) if not n_cols: # nothing to do - return run + return [] # Keep only in-scan samples if st < 0: @@ -245,7 +221,7 @@ def get_rec_collection(data,run,metadata,source,run_info=None,columns=None,entit pad = np.zeros((n_pad, n_cols)) values = np.r_[pad, values] - n_rows = int(run.duration * freq) + n_rows = int(run_info.duration * freq) if len(values) > n_rows: values = values[:n_rows, :] elif len(values) < n_rows: @@ -256,17 +232,8 @@ def get_rec_collection(data,run,metadata,source,run_info=None,columns=None,entit for col in df.columns: var = DenseRunVariable(name=col, values=df[[col]], run_info=run_info, source=source, sampling_rate=freq) - # TODO: this logic can be simplified. Can always append to a list and - # then add to the output object. - if output == 'run': - run.add_variable(var) - else: - colls_output.append(var) - if output == 'run': - return run - else: - return BIDSRunVariableCollection(colls_output) - + colls_output.append(var) + return colls_output def _load_time_variables(layout, dataset=None, columns=None, scan_length=None, @@ -419,8 +386,9 @@ def _load_time_variables(layout, dataset=None, columns=None, scan_length=None, **sub_ents) for cf in confound_files: _data = pd.read_csv(cf.path, sep='\t', na_values='n/a') - run = get_regressors_collection(_data, run, columns=columns) - + reg_colls = get_regressors_collection(_data, run.get_info(), columns=columns) + for rc in reg_colls: + run.add_variable(rc) # Process recordinging files rec_types = [] @@ -440,13 +408,14 @@ def _load_time_variables(layout, dataset=None, columns=None, scan_length=None, # rec_file passed in for now because rec_type needs to be inferred source = 'physio' if '_physio.tsv' in rf else 'stim' data = pd.read_csv(rf, sep='\t') - run = get_rec_collection( + rec_colls = get_rec_collection( data, - run, + run.get_info(), metadata, source, - run_info=run_info, columns=columns) + for rc in rec_colls: + run.add_variable(rc) return dataset From 69c3720569621f25f998cf8d851439dbfcb0f840 Mon Sep 17 00:00:00 2001 From: leej3 Date: Fri, 14 May 2021 10:21:19 +0100 Subject: [PATCH 31/37] remove params, kwargs no longer captured in params --- bids/statsmodels_design_synthesizer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py index 22ae3c47a..f89d69f01 100755 --- a/bids/statsmodels_design_synthesizer.py +++ b/bids/statsmodels_design_synthesizer.py @@ -83,13 +83,13 @@ def statsmodels_design_synthesizer( duration = nvol * tr # Get relevant collection - coll_df = pd.read_csv(params["events_tsv"], delimiter="\t") + coll_df = pd.read_csv(events_tsv, delimiter="\t") RunInfo = namedtuple('RunInfo', ['entities', 'duration', 'tr', 'image', 'n_vols']) - #run_info = RunInfo(parse_file_entities(params["events_tsv"]), duration) + #run_info = RunInfo(parse_file_entities(events_tsv), duration) # TODO: this will need to be implemented without RunNode to break cyclic # dependencies if transformations is to be extracted - run_info = RunInfo(parse_file_entities(params["events_tsv"]), duration, params["tr"], None, params["nvol"]) + run_info = RunInfo(parse_file_entities(events_tsv), duration, tr, None, nvol) coll = BIDSRunVariableCollection(get_events_collection(coll_df, run_info)) # perform transformations, additionally save variables that were changed. From 02cd6fc9efd0828a79eed2bcecca75c121b4a242 Mon Sep 17 00:00:00 2001 From: leej3 Date: Fri, 14 May 2021 10:19:39 +0100 Subject: [PATCH 32/37] add transforms reading function --- bids/statsmodels_design_synthesizer.py | 25 ++------------- bids/variables/io.py | 42 +++++++++++++++++++++++++- 2 files changed, 44 insertions(+), 23 deletions(-) diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py index f89d69f01..af6dffe12 100755 --- a/bids/statsmodels_design_synthesizer.py +++ b/bids/statsmodels_design_synthesizer.py @@ -10,7 +10,7 @@ from bids.utils import convert_JSON from bids.variables import BIDSRunVariableCollection, SparseRunVariable, merge_collections from bids.layout.utils import parse_file_entities -from bids.variables.io import get_events_collection +from bids.variables.io import get_events_collection, parse_transforms from bids.variables.entities import RunNode import click @@ -46,6 +46,7 @@ def main(**kwargs): statsmodels_design_synthesizer(**kwargs) + def statsmodels_design_synthesizer( *, events_tsv, @@ -59,27 +60,7 @@ def statsmodels_design_synthesizer( output_dir = Path(output_dir or "design_synthesizer") output_dir.mkdir(exist_ok=True) - - # Process transformations file - # TODO: abstact transforms file reading into a function. - # TODO: add transforms functionality, for now only model.json is handled - # TODO: some basic error checking to confirm the correct level of - # transformations has been obtained. This will most likely be the case since - # transformations at higher levels will no longer be required when the new - # "flow" approach is used. - transforms_file = Path(transforms) - if not transforms_file.exists(): - raise ValueError(f"Cannot find {transforms_file}") - model = convert_JSON(json.loads(transforms_file.read_text())) - - if "nodes" in model: - nodes_key = "nodes" - elif "steps" in model: - nodes_key = "steps" - else: - raise ValueError("Cannot find a key for nodes in the model file") - model_transforms = model[nodes_key][0]["transformations"] - + model_transforms = parse_transforms(transforms) duration = nvol * tr # Get relevant collection diff --git a/bids/variables/io.py b/bids/variables/io.py index 4886e9d07..0a382d8d0 100644 --- a/bids/variables/io.py +++ b/bids/variables/io.py @@ -1,13 +1,14 @@ """ Tools for reading/writing BIDS data files. """ from os.path import join +from pathlib import Path import warnings import json import numpy as np import pandas as pd -from bids.utils import listify +from bids.utils import listify, convert_JSON from .entities import NodeIndex from .variables import SparseRunVariable, DenseRunVariable, SimpleVariable from .collections import BIDSRunVariableCollection @@ -554,3 +555,42 @@ def make_patt(x, regex_search=False): node.add_variable(SimpleVariable(name=col_name, data=df, source=suffix)) return dataset + + +def parse_transforms(transforms_in, validate=True,level="run"): + """ Adapted from bids.modeling.statsmodels.BIDSStatsModelsGraph. Also + handles files/jsons that only define the transformations section of the + model.json """ + + # input is JSON as a file or dict + if isinstance(transforms_in, str): + if not Path(transforms_in).exists(): + raise ValueError(f"Cannot find path: {transforms_in}") + with open(transforms_in, 'r', encoding='utf-8') as fobj: + transforms_raw = json.load(fobj) + else: + transforms_raw = transforms_in + + # Convert JSON from CamelCase to snake_case keys + transforms_raw = convert_JSON(transforms_raw) + + if validate: + # TODO + # validate_transforms(transforms_raw) + pass + + # Process transformations + # TODO: some basic error checking to confirm the correct level of + # transformations has been obtained. This will most likely be the case since + # transformations at higher levels will no longer be required when the new + # "flow" approach is used. + if "nodes" in transforms_raw: + nodes_key = "nodes" + elif "steps" in transforms_raw: + nodes_key = "steps" + else: + raise ValueError("Cannot find a key for nodes in the json input representing the model") + transforms = transforms_raw[nodes_key][0]["transformations"] + return transforms + + From a9ae6239a36cab5dfc5cc02f79b3792122dfe55e Mon Sep 17 00:00:00 2001 From: leej3 Date: Fri, 14 May 2021 15:53:27 +0100 Subject: [PATCH 33/37] add additional support for transformation parsing Add tests and add support for: Model jsons with transformations Transformation jsons Python in memory representations of the above --- bids/__init__.py | 1 - bids/variables/io.py | 10 +++++----- bids/variables/tests/test_io.py | 35 +++++++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 6 deletions(-) diff --git a/bids/__init__.py b/bids/__init__.py index 3d38ffc26..fb96d2d4b 100644 --- a/bids/__init__.py +++ b/bids/__init__.py @@ -14,7 +14,6 @@ "reports", "utils", "variables", - "statsmodels_design_synthesizer", ] due.cite(Doi("10.1038/sdata.2016.44"), diff --git a/bids/variables/io.py b/bids/variables/io.py index 0a382d8d0..76842583d 100644 --- a/bids/variables/io.py +++ b/bids/variables/io.py @@ -584,13 +584,13 @@ def parse_transforms(transforms_in, validate=True,level="run"): # transformations has been obtained. This will most likely be the case since # transformations at higher levels will no longer be required when the new # "flow" approach is used. - if "nodes" in transforms_raw: - nodes_key = "nodes" - elif "steps" in transforms_raw: - nodes_key = "steps" + if "transformations" in transforms_raw: + transforms = transforms_raw["transformations"] + elif any(k in transforms_raw for k in ["nodes","steps"]): + nodes_key = "nodes" if "nodes" in transforms_raw else "steps" + transforms = transforms_raw[nodes_key][0]["transformations"] else: raise ValueError("Cannot find a key for nodes in the json input representing the model") - transforms = transforms_raw[nodes_key][0]["transformations"] return transforms diff --git a/bids/variables/tests/test_io.py b/bids/variables/tests/test_io.py index c4690ce84..ce2df52a1 100644 --- a/bids/variables/tests/test_io.py +++ b/bids/variables/tests/test_io.py @@ -2,12 +2,21 @@ from bids.variables import (SparseRunVariable, SimpleVariable, DenseRunVariable, load_variables) from bids.variables.entities import Node, RunNode, NodeIndex +from bids.variables.io import parse_transforms from unittest.mock import patch import pytest from os.path import join +from pathlib import Path +import tempfile +import json from bids.tests import get_test_data_path from bids.config import set_option, get_option +EXAMPLE_TRANSFORM = { + "Transformations":[{"Name":"example_trans","Inputs":["col_a","col_b"]}] +} +TRANSFORMS_JSON = join(tempfile.tempdir,"tranformations.json") +Path(TRANSFORMS_JSON).write_text(json.dumps(EXAMPLE_TRANSFORM)) @pytest.fixture def layout1(): @@ -103,3 +112,29 @@ def test_load_synthetic_dataset(synthetic): subs = index.get_nodes('subject') assert len(subs) == 5 assert set(subs[0].variables.keys()) == {'systolic_blood_pressure'} + +@pytest.mark.parametrize( + "test_case,transform_input,expected_names", + [ + ("raw transform json", + EXAMPLE_TRANSFORM, + ["example_trans"] + ), + ("transform json file", + TRANSFORMS_JSON, + ["example_trans"] + ), + ("raw model json", + {"Nodes": [EXAMPLE_TRANSFORM]}, + ["example_trans"] + ), + ("model json file", + str(Path(get_test_data_path()) / "ds005/models/ds-005_type-mfx_model.json"), + ["Scale"] + ), + ] +) +def test_parse_transforms(test_case,transform_input,expected_names): + result = parse_transforms(transform_input) + transformation_names = [x['name'] for x in result] + assert expected_names == transformation_names From 857c5e7d349af0d3cec3415cdaf9b0644ff72190 Mon Sep 17 00:00:00 2001 From: john lee Date: Sat, 29 May 2021 11:39:45 +0100 Subject: [PATCH 34/37] Apply suggestions from code review Correct management of intercept in model file containing convolution.. Make t-test "Test" not type. Co-authored-by: Chris Markiewicz --- .../data/ds005/models/ds-005_type-convolution_model.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bids/tests/data/ds005/models/ds-005_type-convolution_model.json b/bids/tests/data/ds005/models/ds-005_type-convolution_model.json index ff1e77bcb..404123259 100644 --- a/bids/tests/data/ds005/models/ds-005_type-convolution_model.json +++ b/bids/tests/data/ds005/models/ds-005_type-convolution_model.json @@ -14,7 +14,7 @@ "RT", "gain" ], - "Formula": "0 + RT * gain" + "Formula": "1 + RT * gain" }, "Transformations": [ { @@ -43,7 +43,7 @@ } ], "DummyContrasts": { - "Type": "t" + "Test": "t" } }, { @@ -51,7 +51,7 @@ "Level": "Subject", "Model": { "X": [ - "@intercept" + 1 ] }, "DummyContrasts": { From 634481650d04a8d8fe9719ce712c4c5ce3f9373d Mon Sep 17 00:00:00 2001 From: leej3 Date: Sat, 29 May 2021 12:43:27 +0100 Subject: [PATCH 35/37] rename and move to cli Ideally morphing-time should be a subcommand. Moving morphing-time cli test to test_cli requires a little thinking. --- bids/cli.py | 32 +++++++++++++++ ...design_synthesizer.py => morphing_time.py} | 39 +------------------ ...n-synthesizer.py => test_morphing_time.py} | 9 ++--- setup.cfg | 2 +- 4 files changed, 37 insertions(+), 45 deletions(-) rename bids/{statsmodels_design_synthesizer.py => morphing_time.py} (74%) rename bids/tests/{test_statsmodels-design-synthesizer.py => test_morphing_time.py} (90%) diff --git a/bids/cli.py b/bids/cli.py index 4cdc86872..b3fb0d503 100644 --- a/bids/cli.py +++ b/bids/cli.py @@ -4,6 +4,7 @@ from . import __version__ from .layout import BIDSLayoutIndexer, BIDSLayout from .utils import validate_multiple as _validate_multiple +from . import morphing_time as mt # alias -h to trigger help message CONTEXT_SETTINGS = {'help_option_names': ['-h', '--help']} @@ -27,6 +28,37 @@ def cli(): pass +@click.command() +@click.version_option(__version__, prog_name='morphing_time') +@click.option( + "--events-tsv", required=True, help="Path to events TSV") +@click.option( + "--transforms", required=True, help="Path to transform or model json" + ) +@click.option( + "--nvol", required=True, type=int, help="Number of volumes in func time-series" + ) +@click.option( + "--tr", required=True, type=float, help="TR for func time series" + ) +@click.option( + "--ta", required=True, type=float, help="TA for events") +@click.option( + "--output-sampling-rate", + required=False, + type=float, + help="Output sampling rate in Hz when a full design matrix is desired.", + ) +@click.option( + "--output-dir", + required=False, + help="Path to directory to write processed event files.", + ) +def morphing_time(**kwargs): + mt.morphing_time(**kwargs) + + + @cli.command(context_settings=CONTEXT_SETTINGS) @click.argument('root', type=click.Path(file_okay=False, exists=True)) @click.argument('db-path', type=click.Path(file_okay=False, resolve_path=True, exists=True)) diff --git a/bids/statsmodels_design_synthesizer.py b/bids/morphing_time.py similarity index 74% rename from bids/statsmodels_design_synthesizer.py rename to bids/morphing_time.py index af6dffe12..42fd1da97 100755 --- a/bids/statsmodels_design_synthesizer.py +++ b/bids/morphing_time.py @@ -12,42 +12,9 @@ from bids.layout.utils import parse_file_entities from bids.variables.io import get_events_collection, parse_transforms from bids.variables.entities import RunNode -import click -from . import __version__ - -@click.command() -@click.version_option(__version__, prog_name='statsmodels_design_sythesizer') -@click.option( - "--events-tsv", required=True, help="Path to events TSV") -@click.option( - "--transforms", required=True, help="Path to transform or model json" - ) -@click.option( - "--nvol", required=True, type=int, help="Number of volumes in func time-series" - ) -@click.option( - "--tr", required=True, type=float, help="TR for func time series" - ) -@click.option( - "--ta", required=True, type=float, help="TA for events") -@click.option( - "--output-sampling-rate", - required=False, - type=float, - help="Output sampling rate in Hz when a full design matrix is desired.", - ) -@click.option( - "--output-dir", - required=False, - help="Path to directory to write processed event files.", - ) -def main(**kwargs): - statsmodels_design_synthesizer(**kwargs) - - -def statsmodels_design_synthesizer( +def morphing_time( *, events_tsv, transforms, @@ -116,7 +83,3 @@ def statsmodels_design_synthesizer( df_full = colls.to_df(sampling_rate=output_sampling_rate) df_full.to_csv(output_dir / "aggregated_design.tsv", index=None, sep="\t", na_rep="n/a") - - -if __name__ == "__main__": - sys.exit(main()) # pragma: no cover""Main module.""" diff --git a/bids/tests/test_statsmodels-design-synthesizer.py b/bids/tests/test_morphing_time.py similarity index 90% rename from bids/tests/test_statsmodels-design-synthesizer.py rename to bids/tests/test_morphing_time.py index 6ba1cd7e6..600e01ca5 100644 --- a/bids/tests/test_statsmodels-design-synthesizer.py +++ b/bids/tests/test_morphing_time.py @@ -1,17 +1,14 @@ #!/usr/bin/env python -"""Tests for `bids_statsmodels_design_synthesizer` package.""" - import pytest import subprocess as sp from pathlib import Path import tempfile import pandas as pd -SYNTHESIZER = "statsmodels-design-synthesizer" -from bids import statsmodels_design_synthesizer as synth_mod +SYNTHESIZER = "morphing-time" +from bids import morphing_time as synth_mod -# from bids_statsmodels_design_synthesizer import Path(SYNTHESIZER).stem as synth_mod DATA_DIR = (Path(__file__).parent / "data/ds005").absolute() # Define some example user arg combinations (without output_dir which is better @@ -55,7 +52,7 @@ def test_design_aggregation_function(tmp_path,test_case,user_args): def test_design_aggregation_function_with_convolution(tmp_path): EXAMPLE_USER_ARGS_3['output_dir'] = str(tmp_path) - synth_mod.statsmodels_design_synthesizer(**EXAMPLE_USER_ARGS_3) + synth_mod.morphing_time(**EXAMPLE_USER_ARGS_3) sparse_output = pd.read_csv(tmp_path/"transformed_events.tsv", sep='\t') assert 'pos_respcat' in sparse_output.columns assert 'gain' in sparse_output.columns diff --git a/setup.cfg b/setup.cfg index adfc9e72a..a707d4baa 100644 --- a/setup.cfg +++ b/setup.cfg @@ -64,7 +64,7 @@ dev = [options.entry_points] console_scripts = pybids=bids.cli:cli - statsmodels-design-synthesizer=bids.statsmodels_design_synthesizer:main + morphing-time=bids.cli:morphing_time [versioneer] VCS = git From bb47b4cec2ec7d9e923676412e9d2a48da747a1b Mon Sep 17 00:00:00 2001 From: leej3 Date: Sat, 29 May 2021 12:47:44 +0100 Subject: [PATCH 36/37] make ta default to tr --- bids/cli.py | 2 +- bids/morphing_time.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/bids/cli.py b/bids/cli.py index b3fb0d503..4e1ba1248 100644 --- a/bids/cli.py +++ b/bids/cli.py @@ -42,7 +42,7 @@ def cli(): "--tr", required=True, type=float, help="TR for func time series" ) @click.option( - "--ta", required=True, type=float, help="TA for events") + "--ta", required=False, type=float, help="TA for events") @click.option( "--output-sampling-rate", required=False, diff --git a/bids/morphing_time.py b/bids/morphing_time.py index 42fd1da97..ecfdab133 100755 --- a/bids/morphing_time.py +++ b/bids/morphing_time.py @@ -20,7 +20,7 @@ def morphing_time( transforms, nvol, tr, - ta, + ta=None, output_sampling_rate=None, output_dir=None, ): @@ -29,6 +29,7 @@ def morphing_time( output_dir.mkdir(exist_ok=True) model_transforms = parse_transforms(transforms) duration = nvol * tr + ta = ta or tr # Get relevant collection coll_df = pd.read_csv(events_tsv, delimiter="\t") From 14391a93e20a82168e6528f57c2b17f056eef0dc Mon Sep 17 00:00:00 2001 From: leej3 Date: Sat, 29 May 2021 13:36:49 +0100 Subject: [PATCH 37/37] improve parsing of transforms_in --- bids/variables/io.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/bids/variables/io.py b/bids/variables/io.py index 882e37e2e..2692381db 100644 --- a/bids/variables/io.py +++ b/bids/variables/io.py @@ -562,12 +562,21 @@ def parse_transforms(transforms_in, validate=True,level="run"): handles files/jsons that only define the transformations section of the model.json """ - # input is JSON as a file or dict + # input is JSON as string, dict, or path if isinstance(transforms_in, str): - if not Path(transforms_in).exists(): - raise ValueError(f"Cannot find path: {transforms_in}") - with open(transforms_in, 'r', encoding='utf-8') as fobj: - transforms_raw = json.load(fobj) + # read as file if file + if Path(transforms_in).exists(): + transforms_in = Path(transforms_in).read_text() + # convert json as string to dict + try: + transforms_raw = json.loads(transforms_in) + except json.JSONDecodeError as err: + raise json.JSONDecodeError(f""" + {transforms_in} + The above input could not be parsed as valid json... + {err} + """ + ) else: transforms_raw = transforms_in