From 27c21bf26b1ab2c6a36cce1ce96764ec457df164 Mon Sep 17 00:00:00 2001
From: leej3 <johnleenimh@gmail.com>
Date: Tue, 11 May 2021 13:18:14 +0100
Subject: [PATCH 01/37] add initial prototype of design aggregation tool

---
 bids/__init__.py                              |   3 +-
 bids/statsmodels_design_synthesizer.py        | 148 ++++++++++++++++++
 .../test_statsmodels-design-synthesizer.py    |  52 ++++++
 setup.cfg                                     |   1 +
 4 files changed, 203 insertions(+), 1 deletion(-)
 create mode 100755 bids/statsmodels_design_synthesizer.py
 create mode 100644 bids/tests/test_statsmodels-design-synthesizer.py

diff --git a/bids/__init__.py b/bids/__init__.py
index f8e285c1f..3d38ffc26 100644
--- a/bids/__init__.py
+++ b/bids/__init__.py
@@ -13,7 +13,8 @@
     "layout",
     "reports",
     "utils",
-    "variables"
+    "variables",
+    "statsmodels_design_synthesizer",
 ]
 
 due.cite(Doi("10.1038/sdata.2016.44"),
diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py
new file mode 100755
index 000000000..a85156f30
--- /dev/null
+++ b/bids/statsmodels_design_synthesizer.py
@@ -0,0 +1,148 @@
+#! /usr/bin/env python
+import argparse
+import sys
+import json
+from pathlib import Path
+import pandas as pd
+import numpy as np
+from collections import namedtuple
+from bids.modeling import transformations
+from bids.utils import convert_JSON
+from bids.variables import BIDSRunVariableCollection, SparseRunVariable
+from bids.layout.utils import parse_file_entities
+
+
+def statsmodels_design_synthesizer(params):
+    """Console script for bids statsmodels_design_synthesizer."""
+
+    # Output:
+    if not params.get("OUTPUT_DIR"):
+        output_tsv = params.get("OUTPUT_TSV", "aggregated_statsmodels_design.tsv")
+
+    # Sampling rate of output
+    sampling_rate_out = params.get("OUTPUT_SAMPLING_RATE")
+
+    # Process transformations file
+    # TODO: add transforms functionality, for now only model.json is handled
+    # TODO: some basic error checking to confirm the correct level of
+    # transformations has been obtained. This will most likely be the case since
+    # transformations at higher levels will no longer be required when the new
+    # "flow" approach is used.
+    transforms_file = Path(params["TRANSFORMS"])
+    if not transforms_file.exists():
+        raise ValueError(f"Cannot find {transforms_file}")
+    model = convert_JSON(json.loads(model_file.read_text()))
+    model_transforms = model["steps"][0]["transformations"]
+
+    # Get relevant collection
+    coll_df = pd.read_csv(params["EVENTS_TSV"], delimiter="\t")
+    RunInfo = namedtuple("RunInfo", ["entities", "duration"])
+    run_info = RunInfo(parse_file_entities(params["EVENTS_TSV"]), params["DURATION"])
+    coll = get_events_collection(coll_df, [run_info])
+
+    # perform transformations
+    colls = transformations.TransformerManager().transform(coll, model_transforms)
+
+    # Save colls
+    df_out = colls.to_df(sampling_rate=sampling_rate_out)
+    df_out.to_csv(output_tsv, index=None, sep="\t", na_rep="n/a")
+
+
+def get_events_collection(_data, run_info, drop_na=True):
+    """ "
+    This is an attempt to minimally implement:
+    https://github.com/bids-standard/pybids/blob/statsmodels/bids/variables/io.py
+    """
+    colls_output = []
+    if "amplitude" in _data.columns:
+        if (
+            _data["amplitude"].astype(int) == 1
+        ).all() and "trial_type" in _data.columns:
+            msg = (
+                "Column 'amplitude' with constant value 1 "
+                "is unnecessary in event files; ignoring it."
+            )
+            _data = _data.drop("amplitude", axis=1)
+        else:
+            msg = "Column name 'amplitude' is reserved; " "renaming it to 'amplitude_'."
+            _data = _data.rename(columns={"amplitude": "amplitude_"})
+            warnings.warn(msg)
+
+    _data = _data.replace("n/a", np.nan)  # Replace BIDS' n/a
+    _data = _data.apply(pd.to_numeric, errors="ignore")
+
+    _cols = list(set(_data.columns.tolist()) - {"onset", "duration"})
+
+    # Construct a DataFrame for each extra column
+    for col in _cols:
+        df = _data[["onset", "duration"]].copy()
+        df["amplitude"] = _data[col].values
+
+        # Add in all of the run's entities as new columns for
+        # index
+        #        for entity, value in entities.items():
+        #            if entity in ALL_ENTITIES:
+        #                df[entity] = value
+        #
+        if drop_na:
+            df = df.dropna(subset=["amplitude"])
+
+        if df.empty:
+            continue
+        var = SparseRunVariable(name=col, data=df, run_info=run_info, source="events")
+        colls_output.append(var)
+
+    output = BIDSRunVariableCollection(colls_output)
+    return output
+
+
+def create_parser():
+    """Returns argument parser"""
+    p = argparse.ArgumentParser()
+    p.add_argument("--events-tsv", required=True, help="Path to events TSV")
+    p.add_argument(
+        "--transforms", required=True, help="Path to transform or model json"
+    )
+    p.add_argument(
+        "--output-sampling-rate",
+        required=False,
+        help="Output sampling rate in Hz when output is dense instead of sparse",
+    )
+
+    pout = p.add_mutually_exclusive_group()
+    pout.add_argument(
+        "--output-tsv",
+        nargs="?",
+        help="Path to TSV containing a fully constructed design matrix.",
+    )
+    pout.add_argument(
+        "--output-dir",
+        nargs="?",
+        help="Path to directory to write processed event files.",
+    )
+
+    ptimes = p.add_argument_group(
+        "Specify some essential details about the time series."
+    )
+    ptimes.add_argument(
+        "--nvol", required=True, help="Number of volumes in func time-series"
+    )
+    ptimes.add_argument("--tr", required=True, help="TR for func time series")
+    ptimes.add_argument("--ta", required=True, help="TA for events")
+
+    return p
+
+
+def main(user_args=None):
+    parser = create_parser()
+    if user_args is None:
+        namespace = parser.parse_args(sys.argv[1:])
+    else:
+        namespace = parser.parse_args(user_args)
+    params = vars(namespace)
+
+    statsmodels_design_synthesizer(params)
+
+
+if __name__ == "__main__":
+    sys.exit(main())  # pragma: no cover""Main module."""
diff --git a/bids/tests/test_statsmodels-design-synthesizer.py b/bids/tests/test_statsmodels-design-synthesizer.py
new file mode 100644
index 000000000..30404bab7
--- /dev/null
+++ b/bids/tests/test_statsmodels-design-synthesizer.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python
+
+"""Tests for `bids_statsmodels_design_synthesizer` package."""
+
+import pytest
+import subprocess as sp
+from pathlib import Path
+
+SYNTHESIZER = "statsmodels-design-synthesizer"
+from bids.statsmodels_design_synthesizer import statsmodels_design_synthesizer as synth_mod
+
+# from bids_statsmodels_design_synthesizer import Path(SYNTHESIZER).stem as synth_mod
+EXAMPLE_USER_ARGS = {
+        "OUTPUT_TSV": "aggregated_design.tsv",
+        "TRANSFORMS": "data/ds005/models/ds-005_type-mfx_model.json",
+        "EVENTS_TSV": "data/ds005/sub-01/func/sub-01_task-mixedgamblestask_run-01_events.tsv",
+        "TR": 2,
+        "TA": 2,
+        "NVOLS": 160,
+    }
+
+
+def test_cli_help():
+    with pytest.raises(sp.CalledProcessError):
+        output = sp.check_output([SYNTHESIZER, "-h"])
+    with pytest.raises(sp.CalledProcessError):
+        output = sp.check_output([SYNTHESIZER, "--non-existent"])
+
+
+def test_design_aggregation_function():
+    synth_mod.main(EXAMPLE_USER_ARGS)
+
+
+def test_minimal_cli_functionality():
+    """
+    We roughly want to implement the equivalent of the following:
+    from bids.analysis import Analysis
+    from bids.layout import BIDSLayout
+
+    layout = BIDSLayout("data/ds000003")
+    analysis = Analysis(model="data/ds000003/models/model-001_smdl.json",layout=layout)
+    analysis.setup()
+
+    more specifically we want to reimplement this line
+    https://github.com/bids-standard/pybids/blob/b6cd0f6787230ce976a374fbd5fce650865752a3/bids/analysis/analysis.py#L282
+    """
+    bids_dir = Path(__file__).parent / "data/ds000003"
+    model = "model-001_smdl.json"
+    arg_list = " " .join([f"""--{k.lower().replace("_","-")}={v}""" for k,v in EXAMPLE_USER_ARGS.items()])
+    cmd = f"{SYNTHESIZER} {arg_list}"
+    output = sp.check_output(cmd.split())
+
diff --git a/setup.cfg b/setup.cfg
index 2177dc9b9..e2aadbd4c 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -65,6 +65,7 @@ dev =
 [options.entry_points]
 console_scripts =
     pybids=bids.cli:cli
+    statsmodels-design-synthesizer=bids.statsmodels_design_synthesizer:main
 
 [versioneer]
 VCS = git

From bc6cd9116be928baca4de356217db356c85ff1e5 Mon Sep 17 00:00:00 2001
From: leej3 <johnleenimh@gmail.com>
Date: Tue, 11 May 2021 14:50:34 +0100
Subject: [PATCH 02/37] tidy arg parsing

---
 bids/statsmodels_design_synthesizer.py        | 39 ++++++++++++-------
 .../test_statsmodels-design-synthesizer.py    | 19 ++++-----
 2 files changed, 34 insertions(+), 24 deletions(-)

diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py
index a85156f30..c93b71943 100755
--- a/bids/statsmodels_design_synthesizer.py
+++ b/bids/statsmodels_design_synthesizer.py
@@ -16,11 +16,11 @@ def statsmodels_design_synthesizer(params):
     """Console script for bids statsmodels_design_synthesizer."""
 
     # Output:
-    if not params.get("OUTPUT_DIR"):
-        output_tsv = params.get("OUTPUT_TSV", "aggregated_statsmodels_design.tsv")
+    if not params.get("output_dir"):
+        output_tsv = params.get("output_tsv", "aggregated_statsmodels_design.tsv")
 
     # Sampling rate of output
-    sampling_rate_out = params.get("OUTPUT_SAMPLING_RATE")
+    sampling_rate_out = params.get("output_sampling_rate")
 
     # Process transformations file
     # TODO: add transforms functionality, for now only model.json is handled
@@ -28,16 +28,26 @@ def statsmodels_design_synthesizer(params):
     # transformations has been obtained. This will most likely be the case since
     # transformations at higher levels will no longer be required when the new
     # "flow" approach is used.
-    transforms_file = Path(params["TRANSFORMS"])
+    transforms_file = Path(params["transforms"])
     if not transforms_file.exists():
         raise ValueError(f"Cannot find {transforms_file}")
-    model = convert_JSON(json.loads(model_file.read_text()))
-    model_transforms = model["steps"][0]["transformations"]
+    model = convert_JSON(json.loads(transforms_file.read_text()))
+
+    if "nodes" in model:
+        nodes_key = "nodes"
+    elif "steps" in model:
+        nodes_key = "steps"
+    else:
+        raise ValueError("Cannot find a key for nodes in the model file")
+    model_transforms = model[nodes_key][0]["transformations"]
+
+    duration = params["nvol"] * params["tr"]
 
     # Get relevant collection
-    coll_df = pd.read_csv(params["EVENTS_TSV"], delimiter="\t")
+    coll_df = pd.read_csv(params["events_tsv"], delimiter="\t")
     RunInfo = namedtuple("RunInfo", ["entities", "duration"])
-    run_info = RunInfo(parse_file_entities(params["EVENTS_TSV"]), params["DURATION"])
+
+    run_info = RunInfo(parse_file_entities(params["events_tsv"]), duration)
     coll = get_events_collection(coll_df, [run_info])
 
     # perform transformations
@@ -106,6 +116,7 @@ def create_parser():
     p.add_argument(
         "--output-sampling-rate",
         required=False,
+        type=float,
         help="Output sampling rate in Hz when output is dense instead of sparse",
     )
 
@@ -125,10 +136,12 @@ def create_parser():
         "Specify some essential details about the time series."
     )
     ptimes.add_argument(
-        "--nvol", required=True, help="Number of volumes in func time-series"
+        "--nvol", required=True, type=int, help="Number of volumes in func time-series"
+    )
+    ptimes.add_argument(
+        "--tr", required=True, type=float, help="TR for func time series"
     )
-    ptimes.add_argument("--tr", required=True, help="TR for func time series")
-    ptimes.add_argument("--ta", required=True, help="TA for events")
+    ptimes.add_argument("--ta", required=True, type=float, help="TA for events")
 
     return p
 
@@ -137,9 +150,9 @@ def main(user_args=None):
     parser = create_parser()
     if user_args is None:
         namespace = parser.parse_args(sys.argv[1:])
+        params = vars(namespace)
     else:
-        namespace = parser.parse_args(user_args)
-    params = vars(namespace)
+        params = user_args
 
     statsmodels_design_synthesizer(params)
 
diff --git a/bids/tests/test_statsmodels-design-synthesizer.py b/bids/tests/test_statsmodels-design-synthesizer.py
index 30404bab7..b1e794334 100644
--- a/bids/tests/test_statsmodels-design-synthesizer.py
+++ b/bids/tests/test_statsmodels-design-synthesizer.py
@@ -7,22 +7,21 @@
 from pathlib import Path
 
 SYNTHESIZER = "statsmodels-design-synthesizer"
-from bids.statsmodels_design_synthesizer import statsmodels_design_synthesizer as synth_mod
+from bids import statsmodels_design_synthesizer as synth_mod
 
 # from bids_statsmodels_design_synthesizer import Path(SYNTHESIZER).stem as synth_mod
 EXAMPLE_USER_ARGS = {
-        "OUTPUT_TSV": "aggregated_design.tsv",
-        "TRANSFORMS": "data/ds005/models/ds-005_type-mfx_model.json",
-        "EVENTS_TSV": "data/ds005/sub-01/func/sub-01_task-mixedgamblestask_run-01_events.tsv",
-        "TR": 2,
-        "TA": 2,
-        "NVOLS": 160,
+        "output_tsv": "aggregated_design.tsv",
+        "transforms": "data/ds005/models/ds-005_type-mfx_model.json",
+        "events_tsv": "data/ds005/sub-01/func/sub-01_task-mixedgamblestask_run-01_events.tsv",
+        "tr": 2,
+        "ta": 2,
+        "nvol": 160,
     }
 
 
 def test_cli_help():
-    with pytest.raises(sp.CalledProcessError):
-        output = sp.check_output([SYNTHESIZER, "-h"])
+    output = sp.check_output([SYNTHESIZER, "-h"])
     with pytest.raises(sp.CalledProcessError):
         output = sp.check_output([SYNTHESIZER, "--non-existent"])
 
@@ -44,8 +43,6 @@ def test_minimal_cli_functionality():
     more specifically we want to reimplement this line
     https://github.com/bids-standard/pybids/blob/b6cd0f6787230ce976a374fbd5fce650865752a3/bids/analysis/analysis.py#L282
     """
-    bids_dir = Path(__file__).parent / "data/ds000003"
-    model = "model-001_smdl.json"
     arg_list = " " .join([f"""--{k.lower().replace("_","-")}={v}""" for k,v in EXAMPLE_USER_ARGS.items()])
     cmd = f"{SYNTHESIZER} {arg_list}"
     output = sp.check_output(cmd.split())

From bba5284562260afbdf3e74cdc7eed0858df2d452 Mon Sep 17 00:00:00 2001
From: shotgunosine <adenosine@gmail.com>
Date: Tue, 11 May 2021 11:15:28 -0400
Subject: [PATCH 03/37] move get_events_collection_to variables.io

---
 bids/statsmodels_design_synthesizer.py |  56 ++-----------
 bids/variables/io.py                   | 105 +++++++++++++++----------
 2 files changed, 69 insertions(+), 92 deletions(-)

diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py
index c93b71943..bee5c5960 100755
--- a/bids/statsmodels_design_synthesizer.py
+++ b/bids/statsmodels_design_synthesizer.py
@@ -10,6 +10,8 @@
 from bids.utils import convert_JSON
 from bids.variables import BIDSRunVariableCollection, SparseRunVariable
 from bids.layout.utils import parse_file_entities
+from bids.variables.io import get_events_collection
+from bids.variables.entities import RunNode
 
 
 def statsmodels_design_synthesizer(params):
@@ -47,8 +49,9 @@ def statsmodels_design_synthesizer(params):
     coll_df = pd.read_csv(params["events_tsv"], delimiter="\t")
     RunInfo = namedtuple("RunInfo", ["entities", "duration"])
 
-    run_info = RunInfo(parse_file_entities(params["events_tsv"]), duration)
-    coll = get_events_collection(coll_df, [run_info])
+    #run_info = RunInfo(parse_file_entities(params["events_tsv"]), duration)
+    run = RunNode(parse_file_entities(params["events_tsv"]), None, duration, params["tr"], params["nvol"])
+    coll = get_events_collection(coll_df, run, output='collection')
 
     # perform transformations
     colls = transformations.TransformerManager().transform(coll, model_transforms)
@@ -57,55 +60,6 @@ def statsmodels_design_synthesizer(params):
     df_out = colls.to_df(sampling_rate=sampling_rate_out)
     df_out.to_csv(output_tsv, index=None, sep="\t", na_rep="n/a")
 
-
-def get_events_collection(_data, run_info, drop_na=True):
-    """ "
-    This is an attempt to minimally implement:
-    https://github.com/bids-standard/pybids/blob/statsmodels/bids/variables/io.py
-    """
-    colls_output = []
-    if "amplitude" in _data.columns:
-        if (
-            _data["amplitude"].astype(int) == 1
-        ).all() and "trial_type" in _data.columns:
-            msg = (
-                "Column 'amplitude' with constant value 1 "
-                "is unnecessary in event files; ignoring it."
-            )
-            _data = _data.drop("amplitude", axis=1)
-        else:
-            msg = "Column name 'amplitude' is reserved; " "renaming it to 'amplitude_'."
-            _data = _data.rename(columns={"amplitude": "amplitude_"})
-            warnings.warn(msg)
-
-    _data = _data.replace("n/a", np.nan)  # Replace BIDS' n/a
-    _data = _data.apply(pd.to_numeric, errors="ignore")
-
-    _cols = list(set(_data.columns.tolist()) - {"onset", "duration"})
-
-    # Construct a DataFrame for each extra column
-    for col in _cols:
-        df = _data[["onset", "duration"]].copy()
-        df["amplitude"] = _data[col].values
-
-        # Add in all of the run's entities as new columns for
-        # index
-        #        for entity, value in entities.items():
-        #            if entity in ALL_ENTITIES:
-        #                df[entity] = value
-        #
-        if drop_na:
-            df = df.dropna(subset=["amplitude"])
-
-        if df.empty:
-            continue
-        var = SparseRunVariable(name=col, data=df, run_info=run_info, source="events")
-        colls_output.append(var)
-
-    output = BIDSRunVariableCollection(colls_output)
-    return output
-
-
 def create_parser():
     """Returns argument parser"""
     p = argparse.ArgumentParser()
diff --git a/bids/variables/io.py b/bids/variables/io.py
index 6c23a0df8..84c9160a2 100644
--- a/bids/variables/io.py
+++ b/bids/variables/io.py
@@ -10,7 +10,7 @@
 from bids.utils import listify
 from .entities import NodeIndex
 from .variables import SparseRunVariable, DenseRunVariable, SimpleVariable
-
+from .collections import BIDSRunVariableCollection
 
 BASE_ENTITIES = ['subject', 'session', 'task', 'run']
 ALL_ENTITIES = BASE_ENTITIES + ['datatype', 'suffix', 'acquisition']
@@ -120,6 +120,68 @@ def _get_nvols(img_f):
 
     return nvols
 
+def get_events_collection(_data, run, entities=None, drop_na=True, output='run', columns=None):
+    """
+    This is an attempt to minimally implement:
+    https://github.com/bids-standard/pybids/blob/statsmodels/bids/variables/io.py
+
+    in a way that will still work for bids io, but will also work without layout.
+    """
+
+    if output == 'collection':
+        colls_output = []
+    elif output != 'run':
+        raise ValueError(f"output must be one of [run, output], {output} was passed.")
+
+    run_info = run.get_info()
+    if entities is None:
+        entities = run_info.entities
+    if 'amplitude' in _data.columns:
+        if (_data['amplitude'].astype(int) == 1).all() and \
+                'trial_type' in _data.columns:
+            msg = ("Column 'amplitude' with constant value 1 "
+                   "is unnecessary in event files; ignoring it.")
+            _data = _data.drop('amplitude', axis=1)
+        else:
+            msg = ("Column name 'amplitude' is reserved; "
+                   "renaming it to 'amplitude_'.")
+            _data = _data.rename(
+                columns={'amplitude': 'amplitude_'})
+        warnings.warn(msg)
+
+    _data = _data.replace('n/a', np.nan)  # Replace BIDS' n/a
+    _data = _data.apply(pd.to_numeric, errors='ignore')
+
+    _cols = columns or list(set(_data.columns.tolist()) -
+                            {'onset', 'duration'})
+
+    # Construct a DataFrame for each extra column
+    for col in _cols:
+        df = _data[['onset', 'duration']].copy()
+        df['amplitude'] = _data[col].values
+
+        # Add in all of the run's entities as new columns for
+        # index
+        for entity, value in entities.items():
+            if entity in ALL_ENTITIES:
+                df[entity] = value
+
+        if drop_na:
+            df = df.dropna(subset=['amplitude'])
+
+        if df.empty:
+            continue
+
+        var = SparseRunVariable(
+            name=col, data=df, run_info=run_info, source='events')
+        if output == 'run':
+            run.add_variable(var)
+        else:
+            colls_output.append(var)
+    if output == 'run':
+        return run
+    else:
+        return BIDSRunVariableCollection(colls_output)
 
 def _load_time_variables(layout, dataset=None, columns=None, scan_length=None,
                          drop_na=True, events=True, physio=True, stim=True,
@@ -258,46 +320,7 @@ def _load_time_variables(layout, dataset=None, columns=None, scan_length=None,
                 img_f, extension='.tsv', suffix='events', all_=True,
                 full_search=True, ignore_strict_entities=['suffix', 'extension'])
             for _data in dfs:
-                _data = pd.read_csv(_data, sep='\t')
-                if 'amplitude' in _data.columns:
-                    if (_data['amplitude'].astype(int) == 1).all() and \
-                            'trial_type' in _data.columns:
-                        msg = ("Column 'amplitude' with constant value 1 "
-                               "is unnecessary in event files; ignoring it.")
-                        _data = _data.drop('amplitude', axis=1)
-                    else:
-                        msg = ("Column name 'amplitude' is reserved; "
-                               "renaming it to 'amplitude_'.")
-                        _data = _data.rename(
-                            columns={'amplitude': 'amplitude_'})
-                    warnings.warn(msg)
-
-                _data = _data.replace('n/a', np.nan)  # Replace BIDS' n/a
-                _data = _data.apply(pd.to_numeric, errors='ignore')
-
-                _cols = columns or list(set(_data.columns.tolist()) -
-                                        {'onset', 'duration'})
-
-                # Construct a DataFrame for each extra column
-                for col in _cols:
-                    df = _data[['onset', 'duration']].copy()
-                    df['amplitude'] = _data[col].values
-
-                    # Add in all of the run's entities as new columns for
-                    # index
-                    for entity, value in entities.items():
-                        if entity in ALL_ENTITIES:
-                            df[entity] = value
-
-                    if drop_na:
-                        df = df.dropna(subset=['amplitude'])
-
-                    if df.empty:
-                        continue
-
-                    var = SparseRunVariable(
-                        name=col, data=df, run_info=run_info, source='events')
-                    run.add_variable(var)
+                run = get_events_collection(_data, run, entities)
 
         # Process confound files
         if regressors:

From 0d2a888d2f92084c163892312cd23696e5606bd6 Mon Sep 17 00:00:00 2001
From: leej3 <johnleenimh@gmail.com>
Date: Tue, 11 May 2021 20:00:12 +0100
Subject: [PATCH 04/37] fix data path during testing

---
 bids/tests/test_statsmodels-design-synthesizer.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/bids/tests/test_statsmodels-design-synthesizer.py b/bids/tests/test_statsmodels-design-synthesizer.py
index b1e794334..1d197c02d 100644
--- a/bids/tests/test_statsmodels-design-synthesizer.py
+++ b/bids/tests/test_statsmodels-design-synthesizer.py
@@ -10,10 +10,11 @@
 from bids import statsmodels_design_synthesizer as synth_mod
 
 # from bids_statsmodels_design_synthesizer import Path(SYNTHESIZER).stem as synth_mod
+DATA_DIR = (Path(__file__).parent / "data/ds005").absolute()
 EXAMPLE_USER_ARGS = {
         "output_tsv": "aggregated_design.tsv",
-        "transforms": "data/ds005/models/ds-005_type-mfx_model.json",
-        "events_tsv": "data/ds005/sub-01/func/sub-01_task-mixedgamblestask_run-01_events.tsv",
+        "transforms": f"{DATA_DIR}/models/ds-005_type-mfx_model.json",
+        "events_tsv": f"{DATA_DIR}/sub-01/func/sub-01_task-mixedgamblestask_run-01_events.tsv",
         "tr": 2,
         "ta": 2,
         "nvol": 160,

From 4e41306d08f47b4b95f56c13dfbe05c390884819 Mon Sep 17 00:00:00 2001
From: leej3 <johnleenimh@gmail.com>
Date: Tue, 11 May 2021 20:13:34 +0100
Subject: [PATCH 05/37] fix event file reading

---
 bids/variables/io.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/bids/variables/io.py b/bids/variables/io.py
index 84c9160a2..5ad1e9bb5 100644
--- a/bids/variables/io.py
+++ b/bids/variables/io.py
@@ -320,6 +320,7 @@ def _load_time_variables(layout, dataset=None, columns=None, scan_length=None,
                 img_f, extension='.tsv', suffix='events', all_=True,
                 full_search=True, ignore_strict_entities=['suffix', 'extension'])
             for _data in dfs:
+                _data = pd.read_csv(_data, sep='\t')
                 run = get_events_collection(_data, run, entities)
 
         # Process confound files

From 14d32dfd77fe57474c2a75351afd82429ce75fe7 Mon Sep 17 00:00:00 2001
From: leej3 <johnleenimh@gmail.com>
Date: Wed, 12 May 2021 11:15:56 +0100
Subject: [PATCH 06/37] tidy get_events_collection

---
 bids/variables/io.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/bids/variables/io.py b/bids/variables/io.py
index 5ad1e9bb5..91444c0fd 100644
--- a/bids/variables/io.py
+++ b/bids/variables/io.py
@@ -120,7 +120,7 @@ def _get_nvols(img_f):
 
     return nvols
 
-def get_events_collection(_data, run, entities=None, drop_na=True, output='run', columns=None):
+def get_events_collection(_data, run, drop_na=True, columns=None, entities=None, output='run'):
     """
     This is an attempt to minimally implement:
     https://github.com/bids-standard/pybids/blob/statsmodels/bids/variables/io.py
@@ -316,12 +316,12 @@ def _load_time_variables(layout, dataset=None, columns=None, scan_length=None,
 
         # Process event files
         if events:
-            dfs = layout.get_nearest(
+            efiles = layout.get_nearest(
                 img_f, extension='.tsv', suffix='events', all_=True,
                 full_search=True, ignore_strict_entities=['suffix', 'extension'])
-            for _data in dfs:
-                _data = pd.read_csv(_data, sep='\t')
-                run = get_events_collection(_data, run, entities)
+            for ef in efiles:
+                _data = pd.read_csv(ef, sep='\t')
+                run = get_events_collection(_data, run, drop_na=drop_na, columns=columns)
 
         # Process confound files
         if regressors:

From 0fb031bf8d9b54c40c5a1c4ff9578767930d99a1 Mon Sep 17 00:00:00 2001
From: leej3 <johnleenimh@gmail.com>
Date: Wed, 12 May 2021 11:17:51 +0100
Subject: [PATCH 07/37] move loading of regressor into function

---
 bids/statsmodels_design_synthesizer.py |  3 ++
 bids/variables/io.py                   | 46 +++++++++++++++++++++-----
 2 files changed, 40 insertions(+), 9 deletions(-)

diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py
index bee5c5960..fbde2a433 100755
--- a/bids/statsmodels_design_synthesizer.py
+++ b/bids/statsmodels_design_synthesizer.py
@@ -25,6 +25,7 @@ def statsmodels_design_synthesizer(params):
     sampling_rate_out = params.get("output_sampling_rate")
 
     # Process transformations file
+    # TODO: abstact transforms file reading into a function.
     # TODO: add transforms functionality, for now only model.json is handled
     # TODO: some basic error checking to confirm the correct level of
     # transformations has been obtained. This will most likely be the case since
@@ -50,6 +51,8 @@ def statsmodels_design_synthesizer(params):
     RunInfo = namedtuple("RunInfo", ["entities", "duration"])
 
     #run_info = RunInfo(parse_file_entities(params["events_tsv"]), duration)
+    # TODO: this will need to be implemented without RunNode to break cyclic
+    # dependencies if transformations is to be extracted
     run = RunNode(parse_file_entities(params["events_tsv"]), None, duration, params["tr"], params["nvol"])
     coll = get_events_collection(coll_df, run, output='collection')
 
diff --git a/bids/variables/io.py b/bids/variables/io.py
index 91444c0fd..3d9895d9d 100644
--- a/bids/variables/io.py
+++ b/bids/variables/io.py
@@ -183,6 +183,41 @@ def get_events_collection(_data, run, drop_na=True, columns=None, entities=None,
     else:
         return BIDSRunVariableCollection(colls_output)
 
+
+def get_regressors_collection(_data, run, columns=None, entities=None, output='run'):
+
+    if output == 'collection':
+        colls_output = []
+    elif output != 'run':
+        raise ValueError(f"output must be one of [run, output], {output} was passed.")
+
+    run_info = run.get_info()
+    if entities is None:
+        entities = run_info.entities
+ 
+    if columns is not None:
+        conf_cols = list(set(_data.columns) & set(columns))
+        _data = _data.loc[:, conf_cols]
+    for col in _data.columns:
+        # TODO: output sampling rate should likely be used
+        sr = 1. / run.repetition_time
+        var = DenseRunVariable(name=col, values=_data[[col]],
+                       run_info=run_info, source='regressors',
+                       sampling_rate=sr)
+
+        # TODO: this logic can be simplified. Can always append to a list and
+        # then add to the output object.
+        if output == 'run':
+            run.add_variable(var)
+        else:
+            colls_output.append(var)
+    if output == 'run':
+        return run
+    else:
+        return BIDSRunVariableCollection(colls_output)
+
+
+
 def _load_time_variables(layout, dataset=None, columns=None, scan_length=None,
                          drop_na=True, events=True, physio=True, stim=True,
                          regressors=True, skip_empty=True, scope='all',
@@ -331,15 +366,8 @@ def _load_time_variables(layout, dataset=None, columns=None, scan_length=None,
                                         **sub_ents)
             for cf in confound_files:
                 _data = pd.read_csv(cf.path, sep='\t', na_values='n/a')
-                if columns is not None:
-                    conf_cols = list(set(_data.columns) & set(columns))
-                    _data = _data.loc[:, conf_cols]
-                for col in _data.columns:
-                    sr = 1. / run.repetition_time
-                    var = DenseRunVariable(name=col, values=_data[[col]],
-                                           run_info=run_info, source='regressors',
-                                           sampling_rate=sr)
-                    run.add_variable(var)
+                run = get_regressors_collection(_data, run, columns=columns)
+
 
         # Process recordinging files
         rec_types = []

From 8ba2902c2174850009003461036379541ca66a97 Mon Sep 17 00:00:00 2001
From: leej3 <johnleenimh@gmail.com>
Date: Wed, 12 May 2021 12:37:23 +0100
Subject: [PATCH 08/37] move loading of phys and stim files into function

---
 bids/variables/io.py | 114 +++++++++++++++++++++++++++----------------
 1 file changed, 72 insertions(+), 42 deletions(-)

diff --git a/bids/variables/io.py b/bids/variables/io.py
index 3d9895d9d..7124c12fe 100644
--- a/bids/variables/io.py
+++ b/bids/variables/io.py
@@ -185,7 +185,7 @@ def get_events_collection(_data, run, drop_na=True, columns=None, entities=None,
 
 
 def get_regressors_collection(_data, run, columns=None, entities=None, output='run'):
-
+    # TODO: is drop na functionality required?
     if output == 'collection':
         colls_output = []
     elif output != 'run':
@@ -194,7 +194,7 @@ def get_regressors_collection(_data, run, columns=None, entities=None, output='r
     run_info = run.get_info()
     if entities is None:
         entities = run_info.entities
- 
+
     if columns is not None:
         conf_cols = list(set(_data.columns) & set(columns))
         _data = _data.loc[:, conf_cols]
@@ -217,6 +217,73 @@ def get_regressors_collection(_data, run, columns=None, entities=None, output='r
         return BIDSRunVariableCollection(colls_output)
 
 
+def get_rec_collection(rec_file,run,metadata,run_info=None,columns=None,entities=None, output='run'):
+
+    if output == 'collection':
+        colls_output = []
+    elif output != 'run':
+        raise ValueError(f"output must be one of [run, output], {output} was passed.")
+
+    data = pd.read_csv(rec_file, sep='\t')
+    if output == 'collection':
+        colls_output = []
+    elif output != 'run':
+        raise ValueError(f"output must be one of [run, output], {output} was passed.")
+
+    if not run_info:
+        run_info = run.get_info()
+
+    freq = metadata['SamplingFrequency']
+    st = metadata['StartTime']
+    rf_cols = metadata['Columns']
+    data.columns = rf_cols
+
+    # Filter columns if user passed names
+    if columns is not None:
+        rf_cols = list(set(rf_cols) & set(columns))
+        data = data.loc[:, rf_cols]
+
+    n_cols = len(rf_cols)
+    if not n_cols:
+        # nothing to do
+        return run
+
+    # Keep only in-scan samples
+    if st < 0:
+        start_ind = np.floor(-st * freq)
+        values = data.values[start_ind:, :]
+    else:
+        values = data.values
+
+    if st > 0:
+        n_pad = int(freq * st)
+        pad = np.zeros((n_pad, n_cols))
+        values = np.r_[pad, values]
+
+    n_rows = int(run.duration * freq)
+    if len(values) > n_rows:
+        values = values[:n_rows, :]
+    elif len(values) < n_rows:
+        pad = np.zeros((n_rows - len(values), n_cols))
+        values = np.r_[values, pad]
+
+    df = pd.DataFrame(values, columns=rf_cols)
+    source = 'physio' if '_physio.tsv' in rec_file else 'stim'
+    for col in df.columns:
+        var = DenseRunVariable(name=col, values=df[[col]], run_info=run_info,
+                               source=source, sampling_rate=freq)
+         # TODO: this logic can be simplified. Can always append to a list and
+        # then add to the output object.
+        if output == 'run':
+            run.add_variable(var)
+        else:
+            colls_output.append(var)
+    if output == 'run':
+        return run
+    else:
+        return BIDSRunVariableCollection(colls_output)
+
+
 
 def _load_time_variables(layout, dataset=None, columns=None, scan_length=None,
                          drop_na=True, events=True, physio=True, stim=True,
@@ -384,46 +451,9 @@ def _load_time_variables(layout, dataset=None, columns=None, scan_length=None,
                 metadata = layout.get_metadata(rf)
                 if not metadata:
                     raise ValueError("No .json sidecar found for '%s'." % rf)
-                data = pd.read_csv(rf, sep='\t')
-                freq = metadata['SamplingFrequency']
-                st = metadata['StartTime']
-                rf_cols = metadata['Columns']
-                data.columns = rf_cols
-
-                # Filter columns if user passed names
-                if columns is not None:
-                    rf_cols = list(set(rf_cols) & set(columns))
-                    data = data.loc[:, rf_cols]
-
-                n_cols = len(rf_cols)
-                if not n_cols:
-                    continue
-
-                # Keep only in-scan samples
-                if st < 0:
-                    start_ind = np.floor(-st * freq)
-                    values = data.values[start_ind:, :]
-                else:
-                    values = data.values
-
-                if st > 0:
-                    n_pad = int(freq * st)
-                    pad = np.zeros((n_pad, n_cols))
-                    values = np.r_[pad, values]
-
-                n_rows = int(run.duration * freq)
-                if len(values) > n_rows:
-                    values = values[:n_rows, :]
-                elif len(values) < n_rows:
-                    pad = np.zeros((n_rows - len(values), n_cols))
-                    values = np.r_[values, pad]
-
-                df = pd.DataFrame(values, columns=rf_cols)
-                source = 'physio' if '_physio.tsv' in rf else 'stim'
-                for col in df.columns:
-                    var = DenseRunVariable(name=col, values=df[[col]], run_info=run_info,
-                                           source=source, sampling_rate=freq)
-                    run.add_variable(var)
+                # rec_file passed in for now because rec_type needs to be inferred
+                run = get_rec_collection(rf, run, metadata, run_info=run_info, columns=columns)
+
     return dataset
 
 

From 605c2c09893843557c86514c4b3dd34b648920cc Mon Sep 17 00:00:00 2001
From: leej3 <johnleenimh@gmail.com>
Date: Wed, 12 May 2021 13:53:42 +0100
Subject: [PATCH 09/37] output sampling rate not needed for reading input

---
 bids/variables/io.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/bids/variables/io.py b/bids/variables/io.py
index 7124c12fe..f3ac516a0 100644
--- a/bids/variables/io.py
+++ b/bids/variables/io.py
@@ -199,7 +199,6 @@ def get_regressors_collection(_data, run, columns=None, entities=None, output='r
         conf_cols = list(set(_data.columns) & set(columns))
         _data = _data.loc[:, conf_cols]
     for col in _data.columns:
-        # TODO: output sampling rate should likely be used
         sr = 1. / run.repetition_time
         var = DenseRunVariable(name=col, values=_data[[col]],
                        run_info=run_info, source='regressors',

From b115bc03fe32561678deaa429950fcd30d812fbe Mon Sep 17 00:00:00 2001
From: leej3 <johnleenimh@gmail.com>
Date: Wed, 12 May 2021 14:04:51 +0100
Subject: [PATCH 10/37] move source logic out of get_rec_collection

---
 bids/variables/io.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/bids/variables/io.py b/bids/variables/io.py
index f3ac516a0..4ce0e3cbd 100644
--- a/bids/variables/io.py
+++ b/bids/variables/io.py
@@ -216,14 +216,13 @@ def get_regressors_collection(_data, run, columns=None, entities=None, output='r
         return BIDSRunVariableCollection(colls_output)
 
 
-def get_rec_collection(rec_file,run,metadata,run_info=None,columns=None,entities=None, output='run'):
+def get_rec_collection(data,run,metadata,source,run_info=None,columns=None,entities=None, output='run'):
 
     if output == 'collection':
         colls_output = []
     elif output != 'run':
         raise ValueError(f"output must be one of [run, output], {output} was passed.")
 
-    data = pd.read_csv(rec_file, sep='\t')
     if output == 'collection':
         colls_output = []
     elif output != 'run':
@@ -267,7 +266,6 @@ def get_rec_collection(rec_file,run,metadata,run_info=None,columns=None,entities
         values = np.r_[values, pad]
 
     df = pd.DataFrame(values, columns=rf_cols)
-    source = 'physio' if '_physio.tsv' in rec_file else 'stim'
     for col in df.columns:
         var = DenseRunVariable(name=col, values=df[[col]], run_info=run_info,
                                source=source, sampling_rate=freq)
@@ -451,7 +449,15 @@ def _load_time_variables(layout, dataset=None, columns=None, scan_length=None,
                 if not metadata:
                     raise ValueError("No .json sidecar found for '%s'." % rf)
                 # rec_file passed in for now because rec_type needs to be inferred
-                run = get_rec_collection(rf, run, metadata, run_info=run_info, columns=columns)
+                source = 'physio' if '_physio.tsv' in rf else 'stim'
+                data = pd.read_csv(rf, sep='\t')
+                run = get_rec_collection(
+                                         data,
+                                         run,
+                                         metadata,
+                                         source,
+                                         run_info=run_info,
+                                         columns=columns)
 
     return dataset
 

From 63f84a56ac65458158146e74b3a484c2b0309a6b Mon Sep 17 00:00:00 2001
From: leej3 <johnleenimh@gmail.com>
Date: Wed, 12 May 2021 14:07:14 +0100
Subject: [PATCH 11/37] will not drop na in records or reg collections for now

---
 bids/variables/io.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/bids/variables/io.py b/bids/variables/io.py
index 4ce0e3cbd..d69af8d2a 100644
--- a/bids/variables/io.py
+++ b/bids/variables/io.py
@@ -185,7 +185,6 @@ def get_events_collection(_data, run, drop_na=True, columns=None, entities=None,
 
 
 def get_regressors_collection(_data, run, columns=None, entities=None, output='run'):
-    # TODO: is drop na functionality required?
     if output == 'collection':
         colls_output = []
     elif output != 'run':

From 1ee5de1a7dde98f4174ba7e5e13916e8bc669671 Mon Sep 17 00:00:00 2001
From: leej3 <johnleenimh@gmail.com>
Date: Wed, 12 May 2021 14:59:39 +0100
Subject: [PATCH 12/37] use tempdir for output during test

---
 bids/tests/test_statsmodels-design-synthesizer.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/bids/tests/test_statsmodels-design-synthesizer.py b/bids/tests/test_statsmodels-design-synthesizer.py
index 1d197c02d..f33e53bdf 100644
--- a/bids/tests/test_statsmodels-design-synthesizer.py
+++ b/bids/tests/test_statsmodels-design-synthesizer.py
@@ -5,6 +5,7 @@
 import pytest
 import subprocess as sp
 from pathlib import Path
+import tempfile
 
 SYNTHESIZER = "statsmodels-design-synthesizer"
 from bids import statsmodels_design_synthesizer as synth_mod
@@ -12,7 +13,7 @@
 # from bids_statsmodels_design_synthesizer import Path(SYNTHESIZER).stem as synth_mod
 DATA_DIR = (Path(__file__).parent / "data/ds005").absolute()
 EXAMPLE_USER_ARGS = {
-        "output_tsv": "aggregated_design.tsv",
+        "output_dir": tempfile.TemporaryDirectory().name,
         "transforms": f"{DATA_DIR}/models/ds-005_type-mfx_model.json",
         "events_tsv": f"{DATA_DIR}/sub-01/func/sub-01_task-mixedgamblestask_run-01_events.tsv",
         "tr": 2,

From 31776fa28af4b2fce6678e2d29537c02df24e385 Mon Sep 17 00:00:00 2001
From: leej3 <johnleenimh@gmail.com>
Date: Wed, 12 May 2021 15:00:30 +0100
Subject: [PATCH 13/37] remove output-tsv arg and start sparse/dense saving

---
 bids/statsmodels_design_synthesizer.py | 32 ++++++++++++--------------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py
index fbde2a433..744cc132a 100755
--- a/bids/statsmodels_design_synthesizer.py
+++ b/bids/statsmodels_design_synthesizer.py
@@ -17,10 +17,6 @@
 def statsmodels_design_synthesizer(params):
     """Console script for bids statsmodels_design_synthesizer."""
 
-    # Output:
-    if not params.get("output_dir"):
-        output_tsv = params.get("output_tsv", "aggregated_statsmodels_design.tsv")
-
     # Sampling rate of output
     sampling_rate_out = params.get("output_sampling_rate")
 
@@ -57,11 +53,19 @@ def statsmodels_design_synthesizer(params):
     coll = get_events_collection(coll_df, run, output='collection')
 
     # perform transformations
-    colls = transformations.TransformerManager().transform(coll, model_transforms)
+    colls, colls_pre_densifification = transformations.TransformerManager(save_pre_dense=True).transform(coll, model_transforms)
+
+    # Save sparse vars
+    df_sparse = colls_pre_densifification.to_df(include_dense=False)
+    df_sparse.to_csv(output_dir / "transformed_events.tsv", index=None, sep="\t", na_rep="n/a")
+    # Save dense vars
+    df_dense = colls.to_df(include_sparse=False)
+    df_out.to_csv(output_dir / "transformed_time_series.tsv", index=None, sep="\t", na_rep="n/a")
 
-    # Save colls
-    df_out = colls.to_df(sampling_rate=sampling_rate_out)
-    df_out.to_csv(output_tsv, index=None, sep="\t", na_rep="n/a")
+    # Save full design_matrix
+    if sampling_rate_out:
+        df_full = colls.to_df(sampling_rate=sampling_rate_out)
+        df_out.to_csv(output_dir / "aggregated_design.tsv", index=None, sep="\t", na_rep="n/a")
 
 def create_parser():
     """Returns argument parser"""
@@ -74,18 +78,12 @@ def create_parser():
         "--output-sampling-rate",
         required=False,
         type=float,
-        help="Output sampling rate in Hz when output is dense instead of sparse",
+        help="Output sampling rate in Hz when a full design matrix is desired.",
     )
 
-    pout = p.add_mutually_exclusive_group()
-    pout.add_argument(
-        "--output-tsv",
-        nargs="?",
-        help="Path to TSV containing a fully constructed design matrix.",
-    )
-    pout.add_argument(
+    p.add_argument(
         "--output-dir",
-        nargs="?",
+        required=True,
         help="Path to directory to write processed event files.",
     )
 

From bc39cd6ecfe203043cd286c39ecb4e55dd33267e Mon Sep 17 00:00:00 2001
From: shotgunosine <adenosine@gmail.com>
Date: Wed, 12 May 2021 16:11:55 -0400
Subject: [PATCH 14/37] have tfm manager check for densification or deletion of
 sparse variables

---
 bids/modeling/transformations/base.py         | 35 ++++++++++++++++---
 bids/statsmodels_design_synthesizer.py        | 14 ++++++--
 .../test_statsmodels-design-synthesizer.py    | 14 +++++++-
 3 files changed, 55 insertions(+), 8 deletions(-)

diff --git a/bids/modeling/transformations/base.py b/bids/modeling/transformations/base.py
index cc60e6873..a4d171851 100644
--- a/bids/modeling/transformations/base.py
+++ b/bids/modeling/transformations/base.py
@@ -11,10 +11,9 @@
 import pandas as pd
 
 from bids.utils import listify, convert_JSON
-from bids.variables import SparseRunVariable
+from bids.variables import SparseRunVariable, BIDSRunVariableCollection
 from bids.modeling import transformations as pbt
 
-
 class Transformation(metaclass=ABCMeta):
 
     ### Class-level settings ###
@@ -405,13 +404,13 @@ class TransformerManager(object):
             If None, the PyBIDS transformations module is used.
     """
 
-    def __init__(self, default=None):
+    def __init__(self, default=None, save_pre_dense=False):
         self.transformations = {}
         if default is None:
             # Default to PyBIDS transformations
             default = pbt
         self.default = default
-
+        self.save_pre_dense = save_pre_dense
     def _sanitize_name(self, name):
         """ Replace any invalid/reserved transformation names with acceptable
         equivalents.
@@ -448,6 +447,7 @@ def transform(self, collection, transformations):
         transformations : list
             List of transformations to apply.
         """
+        changed_vars = []
         for t in transformations:
             t = convert_JSON(t) # make sure all keys are snake case
             kwargs = dict(t)
@@ -456,11 +456,38 @@ def transform(self, collection, transformations):
 
             # Check registered transformations; fall back on default module
             func = self.transformations.get(name, None)
+            pre_dense = {}
             if func is None:
                 if not hasattr(self.default, name):
                     raise ValueError("No transformation '%s' found: either "
                                      "explicitly register a handler, or pass a"
                                      " default module that supports it." % name)
                 func = getattr(self.default, name)
+                # check for sparse variables here and save them
+                matching_sparse_cols = []
+                if self.save_pre_dense:
+                    for variable in collection.match_variables(cols, return_type='variable'):
+                        if isinstance(variable, SparseRunVariable):
+                            matching_sparse_cols.append(variable.clone())
+
                 func(collection, cols, **kwargs)
+
+                # check here to see if those variables are still sparse
+                # if so, continue, if not, save the sparse variables prior to transformation
+                if len(matching_sparse_cols) > 0:
+                    for variable in matching_sparse_cols:
+                        name = variable.name
+                        matching_post_tfm = collection.match_variables(name, return_type='variable')
+                        assert len(matching_post_tfm) < 2
+                        if (len(matching_post_tfm) == 0) or not isinstance(matching_post_tfm[0], SparseRunVariable):
+                            changed_vars.append(variable)
+
+        if self.save_pre_dense:
+            if len(changed_vars) > 0:
+                changed_vars = BIDSRunVariableCollection(changed_vars)
+                assert np.all([isinstance(vv, SparseRunVariable) for vv in changed_vars.variables.values()])
+                return collection, changed_vars
+            else:
+                return collection, None
         return collection
+
diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py
index 744cc132a..f1c9a9b7c 100755
--- a/bids/statsmodels_design_synthesizer.py
+++ b/bids/statsmodels_design_synthesizer.py
@@ -19,6 +19,8 @@ def statsmodels_design_synthesizer(params):
 
     # Sampling rate of output
     sampling_rate_out = params.get("output_sampling_rate")
+    output_dir = Path(params.get("output_dir", 'design_synthesizer'))
+    output_dir.mkdir(exist_ok=True) 
 
     # Process transformations file
     # TODO: abstact transforms file reading into a function.
@@ -56,11 +58,17 @@ def statsmodels_design_synthesizer(params):
     colls, colls_pre_densifification = transformations.TransformerManager(save_pre_dense=True).transform(coll, model_transforms)
 
     # Save sparse vars
-    df_sparse = colls_pre_densifification.to_df(include_dense=False)
+    try:
+        df_sparse = colls_pre_densifification.to_df(include_dense=False)
+    except AttributeError:
+        df_sparse = colls.to_df(include_dense=False)
     df_sparse.to_csv(output_dir / "transformed_events.tsv", index=None, sep="\t", na_rep="n/a")
     # Save dense vars
-    df_dense = colls.to_df(include_sparse=False)
-    df_out.to_csv(output_dir / "transformed_time_series.tsv", index=None, sep="\t", na_rep="n/a")
+    try:
+        df_dense = colls.to_df(include_sparse=False)
+        df_out.to_csv(output_dir / "transformed_time_series.tsv", index=None, sep="\t", na_rep="n/a")
+    except ValueError:
+        pass
 
     # Save full design_matrix
     if sampling_rate_out:
diff --git a/bids/tests/test_statsmodels-design-synthesizer.py b/bids/tests/test_statsmodels-design-synthesizer.py
index f33e53bdf..e4e43ccfd 100644
--- a/bids/tests/test_statsmodels-design-synthesizer.py
+++ b/bids/tests/test_statsmodels-design-synthesizer.py
@@ -20,7 +20,14 @@
         "ta": 2,
         "nvol": 160,
     }
-
+EXAMPLE_USER_ARGS_2 = {
+        "output_dir": tempfile.TemporaryDirectory().name,
+        "transforms": f"{DATA_DIR}/models/ds-005_type-test_model.json",
+        "events_tsv": f"{DATA_DIR}/sub-01/func/sub-01_task-mixedgamblestask_run-01_events.tsv",
+        "tr": 2,
+        "ta": 2,
+        "nvol": 160,
+    }
 
 def test_cli_help():
     output = sp.check_output([SYNTHESIZER, "-h"])
@@ -30,6 +37,7 @@ def test_cli_help():
 
 def test_design_aggregation_function():
     synth_mod.main(EXAMPLE_USER_ARGS)
+    synth_mod.main(EXAMPLE_USER_ARGS_2)
 
 
 def test_minimal_cli_functionality():
@@ -49,3 +57,7 @@ def test_minimal_cli_functionality():
     cmd = f"{SYNTHESIZER} {arg_list}"
     output = sp.check_output(cmd.split())
 
+    arg_list = " " .join([f"""--{k.lower().replace("_","-")}={v}""" for k,v in EXAMPLE_USER_ARGS_2.items()])
+    cmd = f"{SYNTHESIZER} {arg_list}"
+    output = sp.check_output(cmd.split())
+

From 044386bbd1765045bb5770eabde3a6cbac3e6b3c Mon Sep 17 00:00:00 2001
From: leej3 <johnleenimh@gmail.com>
Date: Thu, 13 May 2021 08:16:04 +0100
Subject: [PATCH 15/37] parametrize tests

---
 .../test_statsmodels-design-synthesizer.py    | 25 +++++++++++++------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/bids/tests/test_statsmodels-design-synthesizer.py b/bids/tests/test_statsmodels-design-synthesizer.py
index e4e43ccfd..245a3f614 100644
--- a/bids/tests/test_statsmodels-design-synthesizer.py
+++ b/bids/tests/test_statsmodels-design-synthesizer.py
@@ -35,12 +35,24 @@ def test_cli_help():
         output = sp.check_output([SYNTHESIZER, "--non-existent"])
 
 
-def test_design_aggregation_function():
+@pytest.mark.parametrize(
+    "test_case,user_args",
+    [
+        ("Model type test", EXAMPLE_USER_ARGS),
+        ("Model type mfx", EXAMPLE_USER_ARGS_2),
+    ]
+)
+def test_design_aggregation_function(test_case,user_args):
     synth_mod.main(EXAMPLE_USER_ARGS)
-    synth_mod.main(EXAMPLE_USER_ARGS_2)
 
-
-def test_minimal_cli_functionality():
+@pytest.mark.parametrize(
+    "test_case,user_args",
+    [
+        ("Model type test", EXAMPLE_USER_ARGS),
+        ("Model type mfx", EXAMPLE_USER_ARGS_2),
+    ]
+)
+def test_minimal_cli_functionality(test_case,user_args):
     """
     We roughly want to implement the equivalent of the following:
     from bids.analysis import Analysis
@@ -53,11 +65,8 @@ def test_minimal_cli_functionality():
     more specifically we want to reimplement this line
     https://github.com/bids-standard/pybids/blob/b6cd0f6787230ce976a374fbd5fce650865752a3/bids/analysis/analysis.py#L282
     """
-    arg_list = " " .join([f"""--{k.lower().replace("_","-")}={v}""" for k,v in EXAMPLE_USER_ARGS.items()])
+    arg_list = " " .join([f"""--{k.lower().replace("_","-")}={v}""" for k,v in user_args.items()])
     cmd = f"{SYNTHESIZER} {arg_list}"
     output = sp.check_output(cmd.split())
 
-    arg_list = " " .join([f"""--{k.lower().replace("_","-")}={v}""" for k,v in EXAMPLE_USER_ARGS_2.items()])
-    cmd = f"{SYNTHESIZER} {arg_list}"
-    output = sp.check_output(cmd.split())
 

From ae83df93d1c6e5c4615c559731d5fa71a26f706b Mon Sep 17 00:00:00 2001
From: leej3 <johnleenimh@gmail.com>
Date: Thu, 13 May 2021 08:25:09 +0100
Subject: [PATCH 16/37] remove stutter

---
 bids/statsmodels_design_synthesizer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py
index f1c9a9b7c..79fdb14a0 100755
--- a/bids/statsmodels_design_synthesizer.py
+++ b/bids/statsmodels_design_synthesizer.py
@@ -55,11 +55,11 @@ def statsmodels_design_synthesizer(params):
     coll = get_events_collection(coll_df, run, output='collection')
 
     # perform transformations
-    colls, colls_pre_densifification = transformations.TransformerManager(save_pre_dense=True).transform(coll, model_transforms)
+    colls, colls_pre_densification = transformations.TransformerManager(save_pre_dense=True).transform(coll, model_transforms)
 
     # Save sparse vars
     try:
-        df_sparse = colls_pre_densifification.to_df(include_dense=False)
+        df_sparse = colls_pre_densification.to_df(include_dense=False)
     except AttributeError:
         df_sparse = colls.to_df(include_dense=False)
     df_sparse.to_csv(output_dir / "transformed_events.tsv", index=None, sep="\t", na_rep="n/a")

From a8fb9237de1995ffdeab607bc1e4bd7e115a2082 Mon Sep 17 00:00:00 2001
From: leej3 <johnleenimh@gmail.com>
Date: Thu, 13 May 2021 09:02:14 +0100
Subject: [PATCH 17/37] add test for sampling rate with associated fix

---
 bids/statsmodels_design_synthesizer.py            | 4 ++--
 bids/tests/test_statsmodels-design-synthesizer.py | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py
index 79fdb14a0..7e54aa280 100755
--- a/bids/statsmodels_design_synthesizer.py
+++ b/bids/statsmodels_design_synthesizer.py
@@ -66,14 +66,14 @@ def statsmodels_design_synthesizer(params):
     # Save dense vars
     try:
         df_dense = colls.to_df(include_sparse=False)
-        df_out.to_csv(output_dir / "transformed_time_series.tsv", index=None, sep="\t", na_rep="n/a")
+        df_dense.to_csv(output_dir / "transformed_time_series.tsv", index=None, sep="\t", na_rep="n/a")
     except ValueError:
         pass
 
     # Save full design_matrix
     if sampling_rate_out:
         df_full = colls.to_df(sampling_rate=sampling_rate_out)
-        df_out.to_csv(output_dir / "aggregated_design.tsv", index=None, sep="\t", na_rep="n/a")
+        df_full.to_csv(output_dir / "aggregated_design.tsv", index=None, sep="\t", na_rep="n/a")
 
 def create_parser():
     """Returns argument parser"""
diff --git a/bids/tests/test_statsmodels-design-synthesizer.py b/bids/tests/test_statsmodels-design-synthesizer.py
index 245a3f614..6fba5ef83 100644
--- a/bids/tests/test_statsmodels-design-synthesizer.py
+++ b/bids/tests/test_statsmodels-design-synthesizer.py
@@ -27,6 +27,7 @@
         "tr": 2,
         "ta": 2,
         "nvol": 160,
+        "output_sampling_rate":10,
     }
 
 def test_cli_help():

From ce7a50b00004bb56af6aaa93bf62e8fedd715856 Mon Sep 17 00:00:00 2001
From: leej3 <johnleenimh@gmail.com>
Date: Thu, 13 May 2021 09:10:29 +0100
Subject: [PATCH 18/37] move test output to the pytest temp dir

---
 bids/tests/test_statsmodels-design-synthesizer.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/bids/tests/test_statsmodels-design-synthesizer.py b/bids/tests/test_statsmodels-design-synthesizer.py
index 6fba5ef83..b68320525 100644
--- a/bids/tests/test_statsmodels-design-synthesizer.py
+++ b/bids/tests/test_statsmodels-design-synthesizer.py
@@ -12,8 +12,10 @@
 
 # from bids_statsmodels_design_synthesizer import Path(SYNTHESIZER).stem as synth_mod
 DATA_DIR = (Path(__file__).parent / "data/ds005").absolute()
+
+# Define some example user arg combinations (without output_dir which is better
+# to define in the scope of the test)
 EXAMPLE_USER_ARGS = {
-        "output_dir": tempfile.TemporaryDirectory().name,
         "transforms": f"{DATA_DIR}/models/ds-005_type-mfx_model.json",
         "events_tsv": f"{DATA_DIR}/sub-01/func/sub-01_task-mixedgamblestask_run-01_events.tsv",
         "tr": 2,
@@ -21,7 +23,6 @@
         "nvol": 160,
     }
 EXAMPLE_USER_ARGS_2 = {
-        "output_dir": tempfile.TemporaryDirectory().name,
         "transforms": f"{DATA_DIR}/models/ds-005_type-test_model.json",
         "events_tsv": f"{DATA_DIR}/sub-01/func/sub-01_task-mixedgamblestask_run-01_events.tsv",
         "tr": 2,
@@ -43,7 +44,8 @@ def test_cli_help():
         ("Model type mfx", EXAMPLE_USER_ARGS_2),
     ]
 )
-def test_design_aggregation_function(test_case,user_args):
+def test_design_aggregation_function(tmp_path,test_case,user_args):
+    user_args['output_dir'] = str(tmp_path)
     synth_mod.main(EXAMPLE_USER_ARGS)
 
 @pytest.mark.parametrize(
@@ -53,7 +55,7 @@ def test_design_aggregation_function(test_case,user_args):
         ("Model type mfx", EXAMPLE_USER_ARGS_2),
     ]
 )
-def test_minimal_cli_functionality(test_case,user_args):
+def test_minimal_cli_functionality(tmp_path,test_case,user_args):
     """
     We roughly want to implement the equivalent of the following:
     from bids.analysis import Analysis
@@ -66,6 +68,7 @@ def test_minimal_cli_functionality(test_case,user_args):
     more specifically we want to reimplement this line
     https://github.com/bids-standard/pybids/blob/b6cd0f6787230ce976a374fbd5fce650865752a3/bids/analysis/analysis.py#L282
     """
+    user_args['output_dir'] = str(tmp_path)
     arg_list = " " .join([f"""--{k.lower().replace("_","-")}={v}""" for k,v in user_args.items()])
     cmd = f"{SYNTHESIZER} {arg_list}"
     output = sp.check_output(cmd.split())

From 810f29ee8800482f61080e44fa2a036bd719c4b4 Mon Sep 17 00:00:00 2001
From: leej3 <johnleenimh@gmail.com>
Date: Thu, 13 May 2021 09:43:31 +0100
Subject: [PATCH 19/37] oops

---
 bids/tests/test_statsmodels-design-synthesizer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bids/tests/test_statsmodels-design-synthesizer.py b/bids/tests/test_statsmodels-design-synthesizer.py
index b68320525..e7dd6b49f 100644
--- a/bids/tests/test_statsmodels-design-synthesizer.py
+++ b/bids/tests/test_statsmodels-design-synthesizer.py
@@ -46,7 +46,7 @@ def test_cli_help():
 )
 def test_design_aggregation_function(tmp_path,test_case,user_args):
     user_args['output_dir'] = str(tmp_path)
-    synth_mod.main(EXAMPLE_USER_ARGS)
+    synth_mod.main(user_args)
 
 @pytest.mark.parametrize(
     "test_case,user_args",

From 76c0c5476152a77229c951cc0aede1500ca2dfd0 Mon Sep 17 00:00:00 2001
From: leej3 <johnleenimh@gmail.com>
Date: Thu, 13 May 2021 09:43:56 +0100
Subject: [PATCH 20/37] consider the sparse variables

---
 bids/statsmodels_design_synthesizer.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py
index 7e54aa280..8eaa03ba3 100755
--- a/bids/statsmodels_design_synthesizer.py
+++ b/bids/statsmodels_design_synthesizer.py
@@ -58,6 +58,10 @@ def statsmodels_design_synthesizer(params):
     colls, colls_pre_densification = transformations.TransformerManager(save_pre_dense=True).transform(coll, model_transforms)
 
     # Save sparse vars
+    # TODO: consider cases where dense/sparse changes from transformation but
+    # sparse vars need to be combined between pre_densification and post
+    # transformation
+    # i.e. list(colls.variables.keys()) != [x.name for x in colls.get_sparse_variables()]
     try:
         df_sparse = colls_pre_densification.to_df(include_dense=False)
     except AttributeError:

From a2fba9276c0c6d3150544a78edb78300b8b94cda Mon Sep 17 00:00:00 2001
From: leej3 <johnleenimh@gmail.com>
Date: Thu, 13 May 2021 14:00:54 +0100
Subject: [PATCH 21/37] correct indentation bug

transformations were previously not applied unless they were defined in
the default transformations
---
 bids/modeling/transformations/base.py | 38 +++++++++++++--------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/bids/modeling/transformations/base.py b/bids/modeling/transformations/base.py
index a4d171851..024f13801 100644
--- a/bids/modeling/transformations/base.py
+++ b/bids/modeling/transformations/base.py
@@ -456,31 +456,31 @@ def transform(self, collection, transformations):
 
             # Check registered transformations; fall back on default module
             func = self.transformations.get(name, None)
-            pre_dense = {}
             if func is None:
                 if not hasattr(self.default, name):
                     raise ValueError("No transformation '%s' found: either "
                                      "explicitly register a handler, or pass a"
                                      " default module that supports it." % name)
                 func = getattr(self.default, name)
-                # check for sparse variables here and save them
-                matching_sparse_cols = []
-                if self.save_pre_dense:
-                    for variable in collection.match_variables(cols, return_type='variable'):
-                        if isinstance(variable, SparseRunVariable):
-                            matching_sparse_cols.append(variable.clone())
-
-                func(collection, cols, **kwargs)
-
-                # check here to see if those variables are still sparse
-                # if so, continue, if not, save the sparse variables prior to transformation
-                if len(matching_sparse_cols) > 0:
-                    for variable in matching_sparse_cols:
-                        name = variable.name
-                        matching_post_tfm = collection.match_variables(name, return_type='variable')
-                        assert len(matching_post_tfm) < 2
-                        if (len(matching_post_tfm) == 0) or not isinstance(matching_post_tfm[0], SparseRunVariable):
-                            changed_vars.append(variable)
+
+            # check for sparse variables here and save them
+            matching_sparse_cols = []
+            if self.save_pre_dense:
+                for variable in collection.match_variables(cols, return_type='variable'):
+                    if isinstance(variable, SparseRunVariable):
+                        matching_sparse_cols.append(variable.clone())
+
+            func(collection, cols, **kwargs)
+
+            # check here to see if those variables are still sparse
+            # if so, continue, if not, save the sparse variables prior to transformation
+            if len(matching_sparse_cols) > 0:
+                for variable in matching_sparse_cols:
+                    name = variable.name
+                    matching_post_tfm = collection.match_variables(name, return_type='variable')
+                    assert len(matching_post_tfm) < 2
+                    if (len(matching_post_tfm) == 0) or not isinstance(matching_post_tfm[0], SparseRunVariable):
+                        changed_vars.append(variable)
 
         if self.save_pre_dense:
             if len(changed_vars) > 0:

From 4a6dac085d8405035dd4152a3b17e3a295901f83 Mon Sep 17 00:00:00 2001
From: leej3 <johnleenimh@gmail.com>
Date: Thu, 13 May 2021 14:12:03 +0100
Subject: [PATCH 22/37] update TODOs

---
 bids/statsmodels_design_synthesizer.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py
index 8eaa03ba3..9a8919def 100755
--- a/bids/statsmodels_design_synthesizer.py
+++ b/bids/statsmodels_design_synthesizer.py
@@ -54,14 +54,11 @@ def statsmodels_design_synthesizer(params):
     run = RunNode(parse_file_entities(params["events_tsv"]), None, duration, params["tr"], params["nvol"])
     coll = get_events_collection(coll_df, run, output='collection')
 
-    # perform transformations
+    # perform transformations, additionally save variables that were changed
+    # TODO: need to consider sparse to sparse
     colls, colls_pre_densification = transformations.TransformerManager(save_pre_dense=True).transform(coll, model_transforms)
 
     # Save sparse vars
-    # TODO: consider cases where dense/sparse changes from transformation but
-    # sparse vars need to be combined between pre_densification and post
-    # transformation
-    # i.e. list(colls.variables.keys()) != [x.name for x in colls.get_sparse_variables()]
     try:
         df_sparse = colls_pre_densification.to_df(include_dense=False)
     except AttributeError:

From 1094c2f02c5458360fe3f81ed9c32377a5cc3590 Mon Sep 17 00:00:00 2001
From: shotgunosine <adenosine@gmail.com>
Date: Thu, 13 May 2021 14:54:01 +0100
Subject: [PATCH 23/37] fix sparse var saving

---
 bids/modeling/transformations/base.py  |  4 ++++
 bids/statsmodels_design_synthesizer.py | 31 +++++++++++++++++++++-----
 2 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/bids/modeling/transformations/base.py b/bids/modeling/transformations/base.py
index 024f13801..31ccc4ff2 100644
--- a/bids/modeling/transformations/base.py
+++ b/bids/modeling/transformations/base.py
@@ -464,6 +464,10 @@ def transform(self, collection, transformations):
                 func = getattr(self.default, name)
 
             # check for sparse variables here and save them
+            # We want everything sparse (the last time it was sparse during the
+            # transformation process) and everything that is dense at the end
+            # of the transformations. This will allow downstream users to add
+            # convolutions etc. as they please.
             matching_sparse_cols = []
             if self.save_pre_dense:
                 for variable in collection.match_variables(cols, return_type='variable'):
diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py
index 9a8919def..3b67b317e 100755
--- a/bids/statsmodels_design_synthesizer.py
+++ b/bids/statsmodels_design_synthesizer.py
@@ -54,15 +54,34 @@ def statsmodels_design_synthesizer(params):
     run = RunNode(parse_file_entities(params["events_tsv"]), None, duration, params["tr"], params["nvol"])
     coll = get_events_collection(coll_df, run, output='collection')
 
-    # perform transformations, additionally save variables that were changed
-    # TODO: need to consider sparse to sparse
-    colls, colls_pre_densification = transformations.TransformerManager(save_pre_dense=True).transform(coll, model_transforms)
+    # perform transformations, additionally save variables that were changed.
+    # If a column is transformed but not densified it will not be in
+    # colls_pre_densification.
+    colls, colls_pre_densification = (
+        transformations.TransformerManager(save_pre_dense=True)
+        .transform(coll, model_transforms)
+        )
 
     # Save sparse vars
-    try:
-        df_sparse = colls_pre_densification.to_df(include_dense=False)
-    except AttributeError:
+    if colls_pre_densification is not None:
+        final_sparse_names = set([vv.name for vv in colls.variables])
+        pre_dense_names = set([vv.name for vv in colls_pre_densifification])
+        shared_names = final_sparse_names.intersection(pre_dense_names)
+        if len(shared_names) > 0:
+            raise ValueError(
+        f"""Somehow you've ended up with a copy of {shared_names} in both the final
+        transformed variables and in the pre-densification variables. Did you delete a
+        variable and recreate one with same name?"""
+        )
+        output = merge_collections(
+            [colls_pre_densification, BidsRunVariableCollection(colls.get_sparse_variables())]
+        )
+        assert output.all_sparse()
+
+        df_sparse = output.to_df()
+    else:
         df_sparse = colls.to_df(include_dense=False)
+
     df_sparse.to_csv(output_dir / "transformed_events.tsv", index=None, sep="\t", na_rep="n/a")
     # Save dense vars
     try:

From e1a977a5ef6d627b32f37a7d7ca0c2f5f8fef4af Mon Sep 17 00:00:00 2001
From: leej3 <johnleenimh@gmail.com>
Date: Thu, 13 May 2021 17:51:43 +0100
Subject: [PATCH 24/37] more fixes for sparse/dense

---
 bids/statsmodels_design_synthesizer.py            | 8 ++++----
 bids/tests/test_statsmodels-design-synthesizer.py | 4 ++++
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py
index 3b67b317e..6a8739dce 100755
--- a/bids/statsmodels_design_synthesizer.py
+++ b/bids/statsmodels_design_synthesizer.py
@@ -8,7 +8,7 @@
 from collections import namedtuple
 from bids.modeling import transformations
 from bids.utils import convert_JSON
-from bids.variables import BIDSRunVariableCollection, SparseRunVariable
+from bids.variables import BIDSRunVariableCollection, SparseRunVariable, merge_collections
 from bids.layout.utils import parse_file_entities
 from bids.variables.io import get_events_collection
 from bids.variables.entities import RunNode
@@ -64,8 +64,8 @@ def statsmodels_design_synthesizer(params):
 
     # Save sparse vars
     if colls_pre_densification is not None:
-        final_sparse_names = set([vv.name for vv in colls.variables])
-        pre_dense_names = set([vv.name for vv in colls_pre_densifification])
+        final_sparse_names = set([vv for vv in colls.variables])
+        pre_dense_names = set([vv for vv in colls_pre_densification.variables])
         shared_names = final_sparse_names.intersection(pre_dense_names)
         if len(shared_names) > 0:
             raise ValueError(
@@ -74,7 +74,7 @@ def statsmodels_design_synthesizer(params):
         variable and recreate one with same name?"""
         )
         output = merge_collections(
-            [colls_pre_densification, BidsRunVariableCollection(colls.get_sparse_variables())]
+            [colls_pre_densification, BIDSRunVariableCollection(colls.get_sparse_variables())]
         )
         assert output.all_sparse()
 
diff --git a/bids/tests/test_statsmodels-design-synthesizer.py b/bids/tests/test_statsmodels-design-synthesizer.py
index e7dd6b49f..b46318688 100644
--- a/bids/tests/test_statsmodels-design-synthesizer.py
+++ b/bids/tests/test_statsmodels-design-synthesizer.py
@@ -30,6 +30,9 @@
         "nvol": 160,
         "output_sampling_rate":10,
     }
+EXAMPLE_USER_ARGS_3 = EXAMPLE_USER_ARGS_2.copy()
+EXAMPLE_USER_ARGS_3["transforms"] = f"{DATA_DIR}/models/ds-005_type-convolution_model.json"
+
 
 def test_cli_help():
     output = sp.check_output([SYNTHESIZER, "-h"])
@@ -42,6 +45,7 @@ def test_cli_help():
     [
         ("Model type test", EXAMPLE_USER_ARGS),
         ("Model type mfx", EXAMPLE_USER_ARGS_2),
+        ("Model type convolution", EXAMPLE_USER_ARGS_3),
     ]
 )
 def test_design_aggregation_function(tmp_path,test_case,user_args):

From 1889e41abe888cf25fa708784e5e635cdd6f2e89 Mon Sep 17 00:00:00 2001
From: leej3 <johnleenimh@gmail.com>
Date: Thu, 13 May 2021 17:56:54 +0100
Subject: [PATCH 25/37] add model with convolution

---
 .../models/ds-005_type-convolution_model.json | 109 ++++++++++++++++++
 1 file changed, 109 insertions(+)
 create mode 100644 bids/tests/data/ds005/models/ds-005_type-convolution_model.json

diff --git a/bids/tests/data/ds005/models/ds-005_type-convolution_model.json b/bids/tests/data/ds005/models/ds-005_type-convolution_model.json
new file mode 100644
index 000000000..ff1e77bcb
--- /dev/null
+++ b/bids/tests/data/ds005/models/ds-005_type-convolution_model.json
@@ -0,0 +1,109 @@
+{
+    "Name": "test_model",
+    "Description": "simple test model",
+    "Nodes": [
+        {
+            "Name": "run",
+            "Level": "Run",
+            "GroupBy": [
+                "run",
+                "subject"
+            ],
+            "Model": {
+                "X": [
+                    "RT",
+                    "gain"
+                ],
+                "Formula": "0 + RT * gain"
+            },
+            "Transformations": [
+                {
+                    "Name": "Factor",
+                    "Input": "trial_type"
+                },
+                {
+                    "Name": "Rename",
+                    "Input": "trial_type.parametric gain",
+                    "Output": "gain"
+                },
+                {
+                    "Name": "Threshold",
+                    "Input": "respcat",
+                    "Output": "pos_respcat",
+                    "Binarize": true
+                },
+                {
+                    "Name": "Scale",
+                    "Input": "RT"
+                },
+                {
+                  "Name": "Convolve",
+                  "Input": ["gain", "pos_respcat"],
+                  "Model": "spm"
+                }
+                     ],
+            "DummyContrasts": {
+                "Type": "t"
+            }
+        },
+        {
+            "Name": "participant",
+            "Level": "Subject",
+            "Model": {
+                "X": [
+                    "@intercept"
+                ]
+            },
+            "DummyContrasts": {
+                "Type": "FEMA"
+            }
+        },
+        {
+            "Name": "by-group",
+            "Level": "Dataset",
+            "Model": {
+                "X": [
+                    "@intercept"
+                ]
+            },
+            "DummyContrasts": {
+                "Type": "t"
+            }
+        },
+        {
+            "Name": "group-diff",
+            "Level": "Dataset",
+            "Model": {
+                "X": [
+                    "@intercept",
+                    "sex"
+                ]
+            },
+            "DummyContrasts": {
+                "Type": "t"
+            }
+        }
+    ],
+    "Edges": [
+        {
+            "Source": "run",
+            "Destination": "participant",
+            "GroupBy": [
+                "subject",
+                "contrast"
+            ]
+        },
+        {
+            "Source": "participant",
+            "Destination": "by-group",
+            "GroupBy": [
+                "sex"
+            ]
+        },
+        {
+            "Source": "participant",
+            "Destination": "group-diff",
+            "GroupBy": []
+        }
+    ]
+}

From a1764f198a6ce4bcecbe4853c689d9db1a631491 Mon Sep 17 00:00:00 2001
From: shotgunosine <adenosine@gmail.com>
Date: Thu, 13 May 2021 12:57:47 -0400
Subject: [PATCH 26/37] Fix sparse variable filtering

---
 bids/statsmodels_design_synthesizer.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py
index 6a8739dce..904d4bd3c 100755
--- a/bids/statsmodels_design_synthesizer.py
+++ b/bids/statsmodels_design_synthesizer.py
@@ -64,7 +64,8 @@ def statsmodels_design_synthesizer(params):
 
     # Save sparse vars
     if colls_pre_densification is not None:
-        final_sparse_names = set([vv for vv in colls.variables])
+        final_sparse_colls = BIDSRunVariableCollection(colls.get_sparse_variables())
+        final_sparse_names = set([vv for vv in final_sparse_colls.variables])
         pre_dense_names = set([vv for vv in colls_pre_densification.variables])
         shared_names = final_sparse_names.intersection(pre_dense_names)
         if len(shared_names) > 0:
@@ -74,7 +75,7 @@ def statsmodels_design_synthesizer(params):
         variable and recreate one with same name?"""
         )
         output = merge_collections(
-            [colls_pre_densification, BIDSRunVariableCollection(colls.get_sparse_variables())]
+            [colls_pre_densification, final_sparse_colls]
         )
         assert output.all_sparse()
 

From 53696641ecf69cc6d5eec3d1381e4e34c4d806ba Mon Sep 17 00:00:00 2001
From: shotgunosine <adenosine@gmail.com>
Date: Thu, 13 May 2021 14:25:58 -0400
Subject: [PATCH 27/37] fix check columns in output dataframes

---
 bids/statsmodels_design_synthesizer.py            |  1 +
 bids/tests/test_statsmodels-design-synthesizer.py | 13 ++++++++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py
index 904d4bd3c..eb25320bf 100755
--- a/bids/statsmodels_design_synthesizer.py
+++ b/bids/statsmodels_design_synthesizer.py
@@ -68,6 +68,7 @@ def statsmodels_design_synthesizer(params):
         final_sparse_names = set([vv for vv in final_sparse_colls.variables])
         pre_dense_names = set([vv for vv in colls_pre_densification.variables])
         shared_names = final_sparse_names.intersection(pre_dense_names)
+
         if len(shared_names) > 0:
             raise ValueError(
         f"""Somehow you've ended up with a copy of {shared_names} in both the final
diff --git a/bids/tests/test_statsmodels-design-synthesizer.py b/bids/tests/test_statsmodels-design-synthesizer.py
index b46318688..a930dd203 100644
--- a/bids/tests/test_statsmodels-design-synthesizer.py
+++ b/bids/tests/test_statsmodels-design-synthesizer.py
@@ -6,6 +6,7 @@
 import subprocess as sp
 from pathlib import Path
 import tempfile
+import pandas as pd
 
 SYNTHESIZER = "statsmodels-design-synthesizer"
 from bids import statsmodels_design_synthesizer as synth_mod
@@ -45,13 +46,23 @@ def test_cli_help():
     [
         ("Model type test", EXAMPLE_USER_ARGS),
         ("Model type mfx", EXAMPLE_USER_ARGS_2),
-        ("Model type convolution", EXAMPLE_USER_ARGS_3),
     ]
 )
 def test_design_aggregation_function(tmp_path,test_case,user_args):
     user_args['output_dir'] = str(tmp_path)
     synth_mod.main(user_args)
 
+def test_design_aggregation_function_with_convolution(tmp_path):
+    EXAMPLE_USER_ARGS_3['output_dir'] = str(tmp_path)
+    synth_mod.main(EXAMPLE_USER_ARGS_3)
+    sparse_output = pd.read_csv(tmp_path/"transformed_events.tsv", sep='\t')
+    assert 'pos_respcat' in sparse_output.columns
+    assert 'gain' in sparse_output.columns
+
+    dense_output = pd.read_csv(tmp_path/"transformed_time_series.tsv", sep='\t')
+    assert 'pos_respcat' in dense_output.columns
+    assert 'gain' in dense_output.columns
+
 @pytest.mark.parametrize(
     "test_case,user_args",
     [

From 34a209fa3f9c0dbaf3486a5d542394d084303886 Mon Sep 17 00:00:00 2001
From: leej3 <johnleenimh@gmail.com>
Date: Thu, 13 May 2021 20:12:01 +0100
Subject: [PATCH 28/37] use click for cli

---
 bids/statsmodels_design_synthesizer.py        | 103 +++++++++---------
 .../test_statsmodels-design-synthesizer.py    |   9 +-
 2 files changed, 54 insertions(+), 58 deletions(-)

diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py
index eb25320bf..bf138d34d 100755
--- a/bids/statsmodels_design_synthesizer.py
+++ b/bids/statsmodels_design_synthesizer.py
@@ -12,14 +12,52 @@
 from bids.layout.utils import parse_file_entities
 from bids.variables.io import get_events_collection
 from bids.variables.entities import RunNode
+import click
 
+from . import __version__
 
-def statsmodels_design_synthesizer(params):
-    """Console script for bids statsmodels_design_synthesizer."""
 
-    # Sampling rate of output
-    sampling_rate_out = params.get("output_sampling_rate")
-    output_dir = Path(params.get("output_dir", 'design_synthesizer'))
+@click.command()
+@click.version_option(__version__, prog_name='statsmodels_design_sythesizer')
+@click.option(
+    "--events-tsv", required=True, help="Path to events TSV")
+@click.option(
+        "--transforms", required=True, help="Path to transform or model json"
+    )
+@click.option(
+        "--nvol", required=True, type=int, help="Number of volumes in func time-series"
+    )
+@click.option(
+        "--tr", required=True, type=float, help="TR for func time series"
+    )
+@click.option(
+    "--ta", required=True, type=float, help="TA for events")
+@click.option(
+        "--output-sampling-rate",
+        required=False,
+        type=float,
+        help="Output sampling rate in Hz when a full design matrix is desired.",
+    )
+@click.option(
+        "--output-dir",
+        required=False,
+        help="Path to directory to write processed event files.",
+    )
+def main(**kwargs):
+    statsmodels_design_synthesizer(**kwargs)
+
+def  statsmodels_design_synthesizer(
+    *,
+    events_tsv,
+    transforms,
+    nvol,
+    tr,
+    ta,
+    output_sampling_rate=None,
+    output_dir=None,
+ ):
+
+    output_dir = Path(output_dir  or "design_synthesizer")
     output_dir.mkdir(exist_ok=True) 
 
     # Process transformations file
@@ -29,7 +67,7 @@ def statsmodels_design_synthesizer(params):
     # transformations has been obtained. This will most likely be the case since
     # transformations at higher levels will no longer be required when the new
     # "flow" approach is used.
-    transforms_file = Path(params["transforms"])
+    transforms_file = Path(transforms)
     if not transforms_file.exists():
         raise ValueError(f"Cannot find {transforms_file}")
     model = convert_JSON(json.loads(transforms_file.read_text()))
@@ -42,16 +80,16 @@ def statsmodels_design_synthesizer(params):
         raise ValueError("Cannot find a key for nodes in the model file")
     model_transforms = model[nodes_key][0]["transformations"]
 
-    duration = params["nvol"] * params["tr"]
+    duration = nvol * tr
 
     # Get relevant collection
-    coll_df = pd.read_csv(params["events_tsv"], delimiter="\t")
+    coll_df = pd.read_csv(events_tsv, delimiter="\t")
     RunInfo = namedtuple("RunInfo", ["entities", "duration"])
 
     #run_info = RunInfo(parse_file_entities(params["events_tsv"]), duration)
     # TODO: this will need to be implemented without RunNode to break cyclic
     # dependencies if transformations is to be extracted
-    run = RunNode(parse_file_entities(params["events_tsv"]), None, duration, params["tr"], params["nvol"])
+    run = RunNode(parse_file_entities(events_tsv), None, duration, tr, nvol)
     coll = get_events_collection(coll_df, run, output='collection')
 
     # perform transformations, additionally save variables that were changed.
@@ -93,53 +131,10 @@ def statsmodels_design_synthesizer(params):
         pass
 
     # Save full design_matrix
-    if sampling_rate_out:
-        df_full = colls.to_df(sampling_rate=sampling_rate_out)
+    if output_sampling_rate:
+        df_full = colls.to_df(sampling_rate=output_sampling_rate)
         df_full.to_csv(output_dir / "aggregated_design.tsv", index=None, sep="\t", na_rep="n/a")
 
-def create_parser():
-    """Returns argument parser"""
-    p = argparse.ArgumentParser()
-    p.add_argument("--events-tsv", required=True, help="Path to events TSV")
-    p.add_argument(
-        "--transforms", required=True, help="Path to transform or model json"
-    )
-    p.add_argument(
-        "--output-sampling-rate",
-        required=False,
-        type=float,
-        help="Output sampling rate in Hz when a full design matrix is desired.",
-    )
-
-    p.add_argument(
-        "--output-dir",
-        required=True,
-        help="Path to directory to write processed event files.",
-    )
-
-    ptimes = p.add_argument_group(
-        "Specify some essential details about the time series."
-    )
-    ptimes.add_argument(
-        "--nvol", required=True, type=int, help="Number of volumes in func time-series"
-    )
-    ptimes.add_argument(
-        "--tr", required=True, type=float, help="TR for func time series"
-    )
-    ptimes.add_argument("--ta", required=True, type=float, help="TA for events")
-
-    return p
-
-
-def main(user_args=None):
-    parser = create_parser()
-    if user_args is None:
-        namespace = parser.parse_args(sys.argv[1:])
-        params = vars(namespace)
-    else:
-        params = user_args
-
-    statsmodels_design_synthesizer(params)
 
 
 if __name__ == "__main__":
diff --git a/bids/tests/test_statsmodels-design-synthesizer.py b/bids/tests/test_statsmodels-design-synthesizer.py
index a930dd203..6ba1cd7e6 100644
--- a/bids/tests/test_statsmodels-design-synthesizer.py
+++ b/bids/tests/test_statsmodels-design-synthesizer.py
@@ -17,8 +17,8 @@
 # Define some example user arg combinations (without output_dir which is better
 # to define in the scope of the test)
 EXAMPLE_USER_ARGS = {
-        "transforms": f"{DATA_DIR}/models/ds-005_type-mfx_model.json",
         "events_tsv": f"{DATA_DIR}/sub-01/func/sub-01_task-mixedgamblestask_run-01_events.tsv",
+        "transforms": f"{DATA_DIR}/models/ds-005_type-mfx_model.json",
         "tr": 2,
         "ta": 2,
         "nvol": 160,
@@ -36,7 +36,7 @@
 
 
 def test_cli_help():
-    output = sp.check_output([SYNTHESIZER, "-h"])
+    output = sp.check_output([SYNTHESIZER, "--help"])
     with pytest.raises(sp.CalledProcessError):
         output = sp.check_output([SYNTHESIZER, "--non-existent"])
 
@@ -50,11 +50,12 @@ def test_cli_help():
 )
 def test_design_aggregation_function(tmp_path,test_case,user_args):
     user_args['output_dir'] = str(tmp_path)
-    synth_mod.main(user_args)
+    main_func = getattr(synth_mod, SYNTHESIZER.replace("-","_"))
+    main_func(**user_args)
 
 def test_design_aggregation_function_with_convolution(tmp_path):
     EXAMPLE_USER_ARGS_3['output_dir'] = str(tmp_path)
-    synth_mod.main(EXAMPLE_USER_ARGS_3)
+    synth_mod.statsmodels_design_synthesizer(**EXAMPLE_USER_ARGS_3)
     sparse_output = pd.read_csv(tmp_path/"transformed_events.tsv", sep='\t')
     assert 'pos_respcat' in sparse_output.columns
     assert 'gain' in sparse_output.columns

From 3c57020b4b2110ffb012907345160589ccdc52b4 Mon Sep 17 00:00:00 2001
From: shotgunosine <adenosine@gmail.com>
Date: Thu, 13 May 2021 16:15:04 -0400
Subject: [PATCH 29/37] enh don't rely on run node for get events collection

---
 bids/statsmodels_design_synthesizer.py |  8 ++++----
 bids/variables/io.py                   | 26 ++++++++------------------
 2 files changed, 12 insertions(+), 22 deletions(-)

diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py
index bf138d34d..22ae3c47a 100755
--- a/bids/statsmodels_design_synthesizer.py
+++ b/bids/statsmodels_design_synthesizer.py
@@ -83,14 +83,14 @@ def  statsmodels_design_synthesizer(
     duration = nvol * tr
 
     # Get relevant collection
-    coll_df = pd.read_csv(events_tsv, delimiter="\t")
-    RunInfo = namedtuple("RunInfo", ["entities", "duration"])
+    coll_df = pd.read_csv(params["events_tsv"], delimiter="\t")
+    RunInfo = namedtuple('RunInfo', ['entities', 'duration', 'tr', 'image', 'n_vols'])
 
     #run_info = RunInfo(parse_file_entities(params["events_tsv"]), duration)
     # TODO: this will need to be implemented without RunNode to break cyclic
     # dependencies if transformations is to be extracted
-    run = RunNode(parse_file_entities(events_tsv), None, duration, tr, nvol)
-    coll = get_events_collection(coll_df, run, output='collection')
+    run_info = RunInfo(parse_file_entities(params["events_tsv"]), duration, params["tr"], None, params["nvol"])
+    coll = BIDSRunVariableCollection(get_events_collection(coll_df, run_info))
 
     # perform transformations, additionally save variables that were changed.
     # If a column is transformed but not densified it will not be in
diff --git a/bids/variables/io.py b/bids/variables/io.py
index d69af8d2a..14d6b821c 100644
--- a/bids/variables/io.py
+++ b/bids/variables/io.py
@@ -120,7 +120,7 @@ def _get_nvols(img_f):
 
     return nvols
 
-def get_events_collection(_data, run, drop_na=True, columns=None, entities=None, output='run'):
+def get_events_collection(_data, run_info, drop_na=True, columns=None, entities=None, output='run'):
     """
     This is an attempt to minimally implement:
     https://github.com/bids-standard/pybids/blob/statsmodels/bids/variables/io.py
@@ -128,12 +128,7 @@ def get_events_collection(_data, run, drop_na=True, columns=None, entities=None,
     in a way that will still work for bids io, but will also work without layout.
     """
 
-    if output == 'collection':
-        colls_output = []
-    elif output != 'run':
-        raise ValueError(f"output must be one of [run, output], {output} was passed.")
-
-    run_info = run.get_info()
+    run_info
     if entities is None:
         entities = run_info.entities
     if 'amplitude' in _data.columns:
@@ -154,7 +149,7 @@ def get_events_collection(_data, run, drop_na=True, columns=None, entities=None,
 
     _cols = columns or list(set(_data.columns.tolist()) -
                             {'onset', 'duration'})
-
+    colls_output = []
     # Construct a DataFrame for each extra column
     for col in _cols:
         df = _data[['onset', 'duration']].copy()
@@ -174,15 +169,8 @@ def get_events_collection(_data, run, drop_na=True, columns=None, entities=None,
 
         var = SparseRunVariable(
             name=col, data=df, run_info=run_info, source='events')
-        if output == 'run':
-            run.add_variable(var)
-        else:
-            colls_output.append(var)
-    if output == 'run':
-        return run
-    else:
-        return BIDSRunVariableCollection(colls_output)
-
+        colls_output.append(var)
+    return colls_output
 
 def get_regressors_collection(_data, run, columns=None, entities=None, output='run'):
     if output == 'collection':
@@ -419,7 +407,9 @@ def _load_time_variables(layout, dataset=None, columns=None, scan_length=None,
                 full_search=True, ignore_strict_entities=['suffix', 'extension'])
             for ef in efiles:
                 _data = pd.read_csv(ef, sep='\t')
-                run = get_events_collection(_data, run, drop_na=drop_na, columns=columns)
+                event_cols = get_events_collection(_data, run.get_info(), drop_na=drop_na, columns=columns)
+                for ec in event_cols:
+                    run.add_variable(ec)
 
         # Process confound files
         if regressors:

From 979ec1051c1c8674d9482ea8fafb5d74c51acedb Mon Sep 17 00:00:00 2001
From: shotgunosine <adenosine@gmail.com>
Date: Thu, 13 May 2021 16:37:02 -0400
Subject: [PATCH 30/37] enh remove run node from rec and reg loading

---
 bids/variables/io.py | 71 +++++++++++++-------------------------------
 1 file changed, 20 insertions(+), 51 deletions(-)

diff --git a/bids/variables/io.py b/bids/variables/io.py
index 14d6b821c..4886e9d07 100644
--- a/bids/variables/io.py
+++ b/bids/variables/io.py
@@ -120,7 +120,7 @@ def _get_nvols(img_f):
 
     return nvols
 
-def get_events_collection(_data, run_info, drop_na=True, columns=None, entities=None, output='run'):
+def get_events_collection(_data, run_info, drop_na=True, columns=None, entities=None):
     """
     This is an attempt to minimally implement:
     https://github.com/bids-standard/pybids/blob/statsmodels/bids/variables/io.py
@@ -172,13 +172,10 @@ def get_events_collection(_data, run_info, drop_na=True, columns=None, entities=
         colls_output.append(var)
     return colls_output
 
-def get_regressors_collection(_data, run, columns=None, entities=None, output='run'):
-    if output == 'collection':
-        colls_output = []
-    elif output != 'run':
-        raise ValueError(f"output must be one of [run, output], {output} was passed.")
 
-    run_info = run.get_info()
+def get_regressors_collection(_data, run_info, columns=None, entities=None):
+    
+    colls_output = []
     if entities is None:
         entities = run_info.entities
 
@@ -186,38 +183,17 @@ def get_regressors_collection(_data, run, columns=None, entities=None, output='r
         conf_cols = list(set(_data.columns) & set(columns))
         _data = _data.loc[:, conf_cols]
     for col in _data.columns:
-        sr = 1. / run.repetition_time
+        sr = 1. / run_info.tr
         var = DenseRunVariable(name=col, values=_data[[col]],
                        run_info=run_info, source='regressors',
                        sampling_rate=sr)
+        colls_output.append(var)
+    return colls_output
 
-        # TODO: this logic can be simplified. Can always append to a list and
-        # then add to the output object.
-        if output == 'run':
-            run.add_variable(var)
-        else:
-            colls_output.append(var)
-    if output == 'run':
-        return run
-    else:
-        return BIDSRunVariableCollection(colls_output)
-
-
-def get_rec_collection(data,run,metadata,source,run_info=None,columns=None,entities=None, output='run'):
-
-    if output == 'collection':
-        colls_output = []
-    elif output != 'run':
-        raise ValueError(f"output must be one of [run, output], {output} was passed.")
-
-    if output == 'collection':
-        colls_output = []
-    elif output != 'run':
-        raise ValueError(f"output must be one of [run, output], {output} was passed.")
 
-    if not run_info:
-        run_info = run.get_info()
+def get_rec_collection(data,run_info,metadata,source,columns=None,entities=None):
 
+    colls_output = []
     freq = metadata['SamplingFrequency']
     st = metadata['StartTime']
     rf_cols = metadata['Columns']
@@ -231,7 +207,7 @@ def get_rec_collection(data,run,metadata,source,run_info=None,columns=None,entit
     n_cols = len(rf_cols)
     if not n_cols:
         # nothing to do
-        return run
+        return []
 
     # Keep only in-scan samples
     if st < 0:
@@ -245,7 +221,7 @@ def get_rec_collection(data,run,metadata,source,run_info=None,columns=None,entit
         pad = np.zeros((n_pad, n_cols))
         values = np.r_[pad, values]
 
-    n_rows = int(run.duration * freq)
+    n_rows = int(run_info.duration * freq)
     if len(values) > n_rows:
         values = values[:n_rows, :]
     elif len(values) < n_rows:
@@ -256,17 +232,8 @@ def get_rec_collection(data,run,metadata,source,run_info=None,columns=None,entit
     for col in df.columns:
         var = DenseRunVariable(name=col, values=df[[col]], run_info=run_info,
                                source=source, sampling_rate=freq)
-         # TODO: this logic can be simplified. Can always append to a list and
-        # then add to the output object.
-        if output == 'run':
-            run.add_variable(var)
-        else:
-            colls_output.append(var)
-    if output == 'run':
-        return run
-    else:
-        return BIDSRunVariableCollection(colls_output)
-
+        colls_output.append(var)
+    return colls_output
 
 
 def _load_time_variables(layout, dataset=None, columns=None, scan_length=None,
@@ -419,8 +386,9 @@ def _load_time_variables(layout, dataset=None, columns=None, scan_length=None,
                                         **sub_ents)
             for cf in confound_files:
                 _data = pd.read_csv(cf.path, sep='\t', na_values='n/a')
-                run = get_regressors_collection(_data, run, columns=columns)
-
+                reg_colls = get_regressors_collection(_data, run.get_info(), columns=columns)
+                for rc in reg_colls:
+                    run.add_variable(rc)
 
         # Process recordinging files
         rec_types = []
@@ -440,13 +408,14 @@ def _load_time_variables(layout, dataset=None, columns=None, scan_length=None,
                 # rec_file passed in for now because rec_type needs to be inferred
                 source = 'physio' if '_physio.tsv' in rf else 'stim'
                 data = pd.read_csv(rf, sep='\t')
-                run = get_rec_collection(
+                rec_colls = get_rec_collection(
                                          data,
-                                         run,
+                                         run.get_info(),
                                          metadata,
                                          source,
-                                         run_info=run_info,
                                          columns=columns)
+                for rc in rec_colls:
+                    run.add_variable(rc)
 
     return dataset
 

From 69c3720569621f25f998cf8d851439dbfcb0f840 Mon Sep 17 00:00:00 2001
From: leej3 <johnleenimh@gmail.com>
Date: Fri, 14 May 2021 10:21:19 +0100
Subject: [PATCH 31/37] remove params, kwargs no longer captured in params

---
 bids/statsmodels_design_synthesizer.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py
index 22ae3c47a..f89d69f01 100755
--- a/bids/statsmodels_design_synthesizer.py
+++ b/bids/statsmodels_design_synthesizer.py
@@ -83,13 +83,13 @@ def  statsmodels_design_synthesizer(
     duration = nvol * tr
 
     # Get relevant collection
-    coll_df = pd.read_csv(params["events_tsv"], delimiter="\t")
+    coll_df = pd.read_csv(events_tsv, delimiter="\t")
     RunInfo = namedtuple('RunInfo', ['entities', 'duration', 'tr', 'image', 'n_vols'])
 
-    #run_info = RunInfo(parse_file_entities(params["events_tsv"]), duration)
+    #run_info = RunInfo(parse_file_entities(events_tsv), duration)
     # TODO: this will need to be implemented without RunNode to break cyclic
     # dependencies if transformations is to be extracted
-    run_info = RunInfo(parse_file_entities(params["events_tsv"]), duration, params["tr"], None, params["nvol"])
+    run_info = RunInfo(parse_file_entities(events_tsv), duration, tr, None, nvol)
     coll = BIDSRunVariableCollection(get_events_collection(coll_df, run_info))
 
     # perform transformations, additionally save variables that were changed.

From 02cd6fc9efd0828a79eed2bcecca75c121b4a242 Mon Sep 17 00:00:00 2001
From: leej3 <johnleenimh@gmail.com>
Date: Fri, 14 May 2021 10:19:39 +0100
Subject: [PATCH 32/37] add transforms reading function

---
 bids/statsmodels_design_synthesizer.py | 25 ++-------------
 bids/variables/io.py                   | 42 +++++++++++++++++++++++++-
 2 files changed, 44 insertions(+), 23 deletions(-)

diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py
index f89d69f01..af6dffe12 100755
--- a/bids/statsmodels_design_synthesizer.py
+++ b/bids/statsmodels_design_synthesizer.py
@@ -10,7 +10,7 @@
 from bids.utils import convert_JSON
 from bids.variables import BIDSRunVariableCollection, SparseRunVariable, merge_collections
 from bids.layout.utils import parse_file_entities
-from bids.variables.io import get_events_collection
+from bids.variables.io import get_events_collection, parse_transforms
 from bids.variables.entities import RunNode
 import click
 
@@ -46,6 +46,7 @@
 def main(**kwargs):
     statsmodels_design_synthesizer(**kwargs)
 
+
 def  statsmodels_design_synthesizer(
     *,
     events_tsv,
@@ -59,27 +60,7 @@ def  statsmodels_design_synthesizer(
 
     output_dir = Path(output_dir  or "design_synthesizer")
     output_dir.mkdir(exist_ok=True) 
-
-    # Process transformations file
-    # TODO: abstact transforms file reading into a function.
-    # TODO: add transforms functionality, for now only model.json is handled
-    # TODO: some basic error checking to confirm the correct level of
-    # transformations has been obtained. This will most likely be the case since
-    # transformations at higher levels will no longer be required when the new
-    # "flow" approach is used.
-    transforms_file = Path(transforms)
-    if not transforms_file.exists():
-        raise ValueError(f"Cannot find {transforms_file}")
-    model = convert_JSON(json.loads(transforms_file.read_text()))
-
-    if "nodes" in model:
-        nodes_key = "nodes"
-    elif "steps" in model:
-        nodes_key = "steps"
-    else:
-        raise ValueError("Cannot find a key for nodes in the model file")
-    model_transforms = model[nodes_key][0]["transformations"]
-
+    model_transforms = parse_transforms(transforms)
     duration = nvol * tr
 
     # Get relevant collection
diff --git a/bids/variables/io.py b/bids/variables/io.py
index 4886e9d07..0a382d8d0 100644
--- a/bids/variables/io.py
+++ b/bids/variables/io.py
@@ -1,13 +1,14 @@
 """ Tools for reading/writing BIDS data files. """
 
 from os.path import join
+from pathlib import Path
 import warnings
 import json
 
 import numpy as np
 import pandas as pd
 
-from bids.utils import listify
+from bids.utils import listify, convert_JSON
 from .entities import NodeIndex
 from .variables import SparseRunVariable, DenseRunVariable, SimpleVariable
 from .collections import BIDSRunVariableCollection
@@ -554,3 +555,42 @@ def make_patt(x, regex_search=False):
             node.add_variable(SimpleVariable(name=col_name, data=df, source=suffix))
 
     return dataset
+
+
+def parse_transforms(transforms_in, validate=True,level="run"):
+    """ Adapted from bids.modeling.statsmodels.BIDSStatsModelsGraph. Also
+    handles files/jsons that only define the transformations section of the
+    model.json """
+
+    # input is JSON as a file or dict
+    if isinstance(transforms_in, str):
+        if not Path(transforms_in).exists():
+            raise ValueError(f"Cannot find path: {transforms_in}")
+        with open(transforms_in, 'r', encoding='utf-8') as fobj:
+            transforms_raw = json.load(fobj)
+    else:
+        transforms_raw = transforms_in
+
+    # Convert JSON from CamelCase to snake_case keys
+    transforms_raw = convert_JSON(transforms_raw)
+
+    if validate:
+       # TODO
+       # validate_transforms(transforms_raw)
+       pass
+
+    # Process transformations
+    # TODO: some basic error checking to confirm the correct level of
+    # transformations has been obtained. This will most likely be the case since
+    # transformations at higher levels will no longer be required when the new
+    # "flow" approach is used.
+    if "nodes" in transforms_raw:
+        nodes_key = "nodes"
+    elif "steps" in transforms_raw:
+        nodes_key = "steps"
+    else:
+        raise ValueError("Cannot find a key for nodes in the json input representing the model")
+    transforms = transforms_raw[nodes_key][0]["transformations"]
+    return transforms
+
+

From a9ae6239a36cab5dfc5cc02f79b3792122dfe55e Mon Sep 17 00:00:00 2001
From: leej3 <johnleenimh@gmail.com>
Date: Fri, 14 May 2021 15:53:27 +0100
Subject: [PATCH 33/37] add additional support for transformation parsing

Add tests and add support for:
Model jsons with transformations
Transformation jsons
Python in memory representations of the above
---
 bids/__init__.py                |  1 -
 bids/variables/io.py            | 10 +++++-----
 bids/variables/tests/test_io.py | 35 +++++++++++++++++++++++++++++++++
 3 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/bids/__init__.py b/bids/__init__.py
index 3d38ffc26..fb96d2d4b 100644
--- a/bids/__init__.py
+++ b/bids/__init__.py
@@ -14,7 +14,6 @@
     "reports",
     "utils",
     "variables",
-    "statsmodels_design_synthesizer",
 ]
 
 due.cite(Doi("10.1038/sdata.2016.44"),
diff --git a/bids/variables/io.py b/bids/variables/io.py
index 0a382d8d0..76842583d 100644
--- a/bids/variables/io.py
+++ b/bids/variables/io.py
@@ -584,13 +584,13 @@ def parse_transforms(transforms_in, validate=True,level="run"):
     # transformations has been obtained. This will most likely be the case since
     # transformations at higher levels will no longer be required when the new
     # "flow" approach is used.
-    if "nodes" in transforms_raw:
-        nodes_key = "nodes"
-    elif "steps" in transforms_raw:
-        nodes_key = "steps"
+    if "transformations" in transforms_raw:
+        transforms = transforms_raw["transformations"]
+    elif any(k in transforms_raw for k in ["nodes","steps"]):
+        nodes_key = "nodes" if "nodes" in transforms_raw else "steps"
+        transforms = transforms_raw[nodes_key][0]["transformations"]
     else:
         raise ValueError("Cannot find a key for nodes in the json input representing the model")
-    transforms = transforms_raw[nodes_key][0]["transformations"]
     return transforms
 
 
diff --git a/bids/variables/tests/test_io.py b/bids/variables/tests/test_io.py
index c4690ce84..ce2df52a1 100644
--- a/bids/variables/tests/test_io.py
+++ b/bids/variables/tests/test_io.py
@@ -2,12 +2,21 @@
 from bids.variables import (SparseRunVariable, SimpleVariable,
                             DenseRunVariable, load_variables)
 from bids.variables.entities import Node, RunNode, NodeIndex
+from bids.variables.io import parse_transforms
 from unittest.mock import patch
 import pytest
 from os.path import join
+from pathlib import Path
+import tempfile
+import json
 from bids.tests import get_test_data_path
 from bids.config import set_option, get_option
 
+EXAMPLE_TRANSFORM = {
+    "Transformations":[{"Name":"example_trans","Inputs":["col_a","col_b"]}]
+}
+TRANSFORMS_JSON = join(tempfile.tempdir,"tranformations.json")
+Path(TRANSFORMS_JSON).write_text(json.dumps(EXAMPLE_TRANSFORM))
 
 @pytest.fixture
 def layout1():
@@ -103,3 +112,29 @@ def test_load_synthetic_dataset(synthetic):
     subs = index.get_nodes('subject')
     assert len(subs) == 5
     assert set(subs[0].variables.keys()) == {'systolic_blood_pressure'}
+
+@pytest.mark.parametrize(
+    "test_case,transform_input,expected_names",
+    [
+        ("raw transform json",
+         EXAMPLE_TRANSFORM,
+         ["example_trans"]
+        ),
+        ("transform json file",
+         TRANSFORMS_JSON,
+         ["example_trans"]
+        ),
+        ("raw model json",
+         {"Nodes": [EXAMPLE_TRANSFORM]},
+         ["example_trans"]
+        ),
+         ("model json file",
+         str(Path(get_test_data_path()) / "ds005/models/ds-005_type-mfx_model.json"),
+         ["Scale"]
+        ),
+    ]
+)
+def test_parse_transforms(test_case,transform_input,expected_names):
+    result = parse_transforms(transform_input)
+    transformation_names =  [x['name'] for x in result]
+    assert expected_names == transformation_names

From 857c5e7d349af0d3cec3415cdaf9b0644ff72190 Mon Sep 17 00:00:00 2001
From: john lee <leej3@quansight.com>
Date: Sat, 29 May 2021 11:39:45 +0100
Subject: [PATCH 34/37] Apply suggestions from code review

Correct management of intercept in model file containing convolution..
Make t-test "Test" not type.

Co-authored-by: Chris Markiewicz <markiewicz@stanford.edu>
---
 .../data/ds005/models/ds-005_type-convolution_model.json    | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/bids/tests/data/ds005/models/ds-005_type-convolution_model.json b/bids/tests/data/ds005/models/ds-005_type-convolution_model.json
index ff1e77bcb..404123259 100644
--- a/bids/tests/data/ds005/models/ds-005_type-convolution_model.json
+++ b/bids/tests/data/ds005/models/ds-005_type-convolution_model.json
@@ -14,7 +14,7 @@
                     "RT",
                     "gain"
                 ],
-                "Formula": "0 + RT * gain"
+                "Formula": "1 + RT * gain"
             },
             "Transformations": [
                 {
@@ -43,7 +43,7 @@
                 }
                      ],
             "DummyContrasts": {
-                "Type": "t"
+                "Test": "t"
             }
         },
         {
@@ -51,7 +51,7 @@
             "Level": "Subject",
             "Model": {
                 "X": [
-                    "@intercept"
+                    1
                 ]
             },
             "DummyContrasts": {

From 634481650d04a8d8fe9719ce712c4c5ce3f9373d Mon Sep 17 00:00:00 2001
From: leej3 <johnleenimh@gmail.com>
Date: Sat, 29 May 2021 12:43:27 +0100
Subject: [PATCH 35/37] rename and move to cli

Ideally morphing-time should be a subcommand.
Moving morphing-time cli test to test_cli requires a little thinking.
---
 bids/cli.py                                   | 32 +++++++++++++++
 ...design_synthesizer.py => morphing_time.py} | 39 +------------------
 ...n-synthesizer.py => test_morphing_time.py} |  9 ++---
 setup.cfg                                     |  2 +-
 4 files changed, 37 insertions(+), 45 deletions(-)
 rename bids/{statsmodels_design_synthesizer.py => morphing_time.py} (74%)
 rename bids/tests/{test_statsmodels-design-synthesizer.py => test_morphing_time.py} (90%)

diff --git a/bids/cli.py b/bids/cli.py
index 4cdc86872..b3fb0d503 100644
--- a/bids/cli.py
+++ b/bids/cli.py
@@ -4,6 +4,7 @@
 from . import __version__
 from .layout import BIDSLayoutIndexer, BIDSLayout
 from .utils import validate_multiple as _validate_multiple
+from . import morphing_time as mt
 
 # alias -h to trigger help message
 CONTEXT_SETTINGS = {'help_option_names': ['-h', '--help']}
@@ -27,6 +28,37 @@ def cli():
     pass
 
 
+@click.command()
+@click.version_option(__version__, prog_name='morphing_time')
+@click.option(
+    "--events-tsv", required=True, help="Path to events TSV")
+@click.option(
+        "--transforms", required=True, help="Path to transform or model json"
+    )
+@click.option(
+        "--nvol", required=True, type=int, help="Number of volumes in func time-series"
+    )
+@click.option(
+        "--tr", required=True, type=float, help="TR for func time series"
+    )
+@click.option(
+    "--ta", required=True, type=float, help="TA for events")
+@click.option(
+        "--output-sampling-rate",
+        required=False,
+        type=float,
+        help="Output sampling rate in Hz when a full design matrix is desired.",
+    )
+@click.option(
+        "--output-dir",
+        required=False,
+        help="Path to directory to write processed event files.",
+    )
+def morphing_time(**kwargs):
+    mt.morphing_time(**kwargs)
+
+
+
 @cli.command(context_settings=CONTEXT_SETTINGS)
 @click.argument('root', type=click.Path(file_okay=False, exists=True))
 @click.argument('db-path', type=click.Path(file_okay=False, resolve_path=True, exists=True))
diff --git a/bids/statsmodels_design_synthesizer.py b/bids/morphing_time.py
similarity index 74%
rename from bids/statsmodels_design_synthesizer.py
rename to bids/morphing_time.py
index af6dffe12..42fd1da97 100755
--- a/bids/statsmodels_design_synthesizer.py
+++ b/bids/morphing_time.py
@@ -12,42 +12,9 @@
 from bids.layout.utils import parse_file_entities
 from bids.variables.io import get_events_collection, parse_transforms
 from bids.variables.entities import RunNode
-import click
 
-from . import __version__
 
-
-@click.command()
-@click.version_option(__version__, prog_name='statsmodels_design_sythesizer')
-@click.option(
-    "--events-tsv", required=True, help="Path to events TSV")
-@click.option(
-        "--transforms", required=True, help="Path to transform or model json"
-    )
-@click.option(
-        "--nvol", required=True, type=int, help="Number of volumes in func time-series"
-    )
-@click.option(
-        "--tr", required=True, type=float, help="TR for func time series"
-    )
-@click.option(
-    "--ta", required=True, type=float, help="TA for events")
-@click.option(
-        "--output-sampling-rate",
-        required=False,
-        type=float,
-        help="Output sampling rate in Hz when a full design matrix is desired.",
-    )
-@click.option(
-        "--output-dir",
-        required=False,
-        help="Path to directory to write processed event files.",
-    )
-def main(**kwargs):
-    statsmodels_design_synthesizer(**kwargs)
-
-
-def  statsmodels_design_synthesizer(
+def  morphing_time(
     *,
     events_tsv,
     transforms,
@@ -116,7 +83,3 @@ def  statsmodels_design_synthesizer(
         df_full = colls.to_df(sampling_rate=output_sampling_rate)
         df_full.to_csv(output_dir / "aggregated_design.tsv", index=None, sep="\t", na_rep="n/a")
 
-
-
-if __name__ == "__main__":
-    sys.exit(main())  # pragma: no cover""Main module."""
diff --git a/bids/tests/test_statsmodels-design-synthesizer.py b/bids/tests/test_morphing_time.py
similarity index 90%
rename from bids/tests/test_statsmodels-design-synthesizer.py
rename to bids/tests/test_morphing_time.py
index 6ba1cd7e6..600e01ca5 100644
--- a/bids/tests/test_statsmodels-design-synthesizer.py
+++ b/bids/tests/test_morphing_time.py
@@ -1,17 +1,14 @@
 #!/usr/bin/env python
 
-"""Tests for `bids_statsmodels_design_synthesizer` package."""
-
 import pytest
 import subprocess as sp
 from pathlib import Path
 import tempfile
 import pandas as pd
 
-SYNTHESIZER = "statsmodels-design-synthesizer"
-from bids import statsmodels_design_synthesizer as synth_mod
+SYNTHESIZER = "morphing-time"
+from bids import morphing_time as synth_mod
 
-# from bids_statsmodels_design_synthesizer import Path(SYNTHESIZER).stem as synth_mod
 DATA_DIR = (Path(__file__).parent / "data/ds005").absolute()
 
 # Define some example user arg combinations (without output_dir which is better
@@ -55,7 +52,7 @@ def test_design_aggregation_function(tmp_path,test_case,user_args):
 
 def test_design_aggregation_function_with_convolution(tmp_path):
     EXAMPLE_USER_ARGS_3['output_dir'] = str(tmp_path)
-    synth_mod.statsmodels_design_synthesizer(**EXAMPLE_USER_ARGS_3)
+    synth_mod.morphing_time(**EXAMPLE_USER_ARGS_3)
     sparse_output = pd.read_csv(tmp_path/"transformed_events.tsv", sep='\t')
     assert 'pos_respcat' in sparse_output.columns
     assert 'gain' in sparse_output.columns
diff --git a/setup.cfg b/setup.cfg
index adfc9e72a..a707d4baa 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -64,7 +64,7 @@ dev =
 [options.entry_points]
 console_scripts =
     pybids=bids.cli:cli
-    statsmodels-design-synthesizer=bids.statsmodels_design_synthesizer:main
+    morphing-time=bids.cli:morphing_time
 
 [versioneer]
 VCS = git

From bb47b4cec2ec7d9e923676412e9d2a48da747a1b Mon Sep 17 00:00:00 2001
From: leej3 <johnleenimh@gmail.com>
Date: Sat, 29 May 2021 12:47:44 +0100
Subject: [PATCH 36/37] make ta default to tr

---
 bids/cli.py           | 2 +-
 bids/morphing_time.py | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/bids/cli.py b/bids/cli.py
index b3fb0d503..4e1ba1248 100644
--- a/bids/cli.py
+++ b/bids/cli.py
@@ -42,7 +42,7 @@ def cli():
         "--tr", required=True, type=float, help="TR for func time series"
     )
 @click.option(
-    "--ta", required=True, type=float, help="TA for events")
+    "--ta", required=False, type=float, help="TA for events")
 @click.option(
         "--output-sampling-rate",
         required=False,
diff --git a/bids/morphing_time.py b/bids/morphing_time.py
index 42fd1da97..ecfdab133 100755
--- a/bids/morphing_time.py
+++ b/bids/morphing_time.py
@@ -20,7 +20,7 @@ def  morphing_time(
     transforms,
     nvol,
     tr,
-    ta,
+    ta=None,
     output_sampling_rate=None,
     output_dir=None,
  ):
@@ -29,6 +29,7 @@ def  morphing_time(
     output_dir.mkdir(exist_ok=True) 
     model_transforms = parse_transforms(transforms)
     duration = nvol * tr
+    ta = ta or tr
 
     # Get relevant collection
     coll_df = pd.read_csv(events_tsv, delimiter="\t")

From 14391a93e20a82168e6528f57c2b17f056eef0dc Mon Sep 17 00:00:00 2001
From: leej3 <johnleenimh@gmail.com>
Date: Sat, 29 May 2021 13:36:49 +0100
Subject: [PATCH 37/37] improve parsing of transforms_in

---
 bids/variables/io.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/bids/variables/io.py b/bids/variables/io.py
index 882e37e2e..2692381db 100644
--- a/bids/variables/io.py
+++ b/bids/variables/io.py
@@ -562,12 +562,21 @@ def parse_transforms(transforms_in, validate=True,level="run"):
     handles files/jsons that only define the transformations section of the
     model.json """
 
-    # input is JSON as a file or dict
+    # input is JSON as string, dict, or path
     if isinstance(transforms_in, str):
-        if not Path(transforms_in).exists():
-            raise ValueError(f"Cannot find path: {transforms_in}")
-        with open(transforms_in, 'r', encoding='utf-8') as fobj:
-            transforms_raw = json.load(fobj)
+        # read as file if file
+        if Path(transforms_in).exists():
+            transforms_in = Path(transforms_in).read_text()
+        # convert json as string to dict
+        try:
+            transforms_raw = json.loads(transforms_in)
+        except json.JSONDecodeError as err:
+            raise json.JSONDecodeError(f"""
+                {transforms_in}
+                The above input could not be parsed as valid json...
+                {err}
+            """
+            )
     else:
         transforms_raw = transforms_in