diff --git a/.gitignore b/.gitignore index 8ff6ce74..1f92f3c7 100644 --- a/.gitignore +++ b/.gitignore @@ -11,7 +11,7 @@ dist/ .cache/ htmlcov .pytest_cache/* - +dev/ # Logs and databases # ###################### *.log diff --git a/docs/auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_001.png b/docs/auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_001.png index 529e65a3..8e78acfb 100644 Binary files a/docs/auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_001.png and b/docs/auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_001.png differ diff --git a/docs/auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_002.png b/docs/auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_002.png index f9d2eae7..28ae543b 100644 Binary files a/docs/auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_002.png and b/docs/auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_002.png differ diff --git a/docs/auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_003.png b/docs/auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_003.png index 88c53f11..3b87be04 100644 Binary files a/docs/auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_003.png and b/docs/auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_003.png differ diff --git a/docs/auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_004.png b/docs/auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_004.png index b9af2888..856c01b4 100644 Binary files a/docs/auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_004.png and b/docs/auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_004.png differ diff --git a/docs/auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_005.png b/docs/auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_005.png index 648e38a3..57179857 100644 Binary files a/docs/auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_005.png and b/docs/auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_005.png differ diff --git a/docs/auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_006.png b/docs/auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_006.png index 5b1f8bac..0b05b74d 100644 Binary files a/docs/auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_006.png and b/docs/auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_006.png differ diff --git a/docs/auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_007.png b/docs/auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_007.png index 6e385435..a3a6f3f4 100644 Binary files a/docs/auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_007.png and b/docs/auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_007.png differ diff --git a/docs/auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_008.png b/docs/auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_008.png index 97a04148..bf6245a1 100644 Binary files a/docs/auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_008.png and b/docs/auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_008.png differ diff --git a/docs/auto_examples/01_DataOperations/images/thumb/sphx_glr_plot_design_matrix_thumb.png b/docs/auto_examples/01_DataOperations/images/thumb/sphx_glr_plot_design_matrix_thumb.png index f256f40b..3e6d9cb6 100644 Binary files a/docs/auto_examples/01_DataOperations/images/thumb/sphx_glr_plot_design_matrix_thumb.png and b/docs/auto_examples/01_DataOperations/images/thumb/sphx_glr_plot_design_matrix_thumb.png differ diff --git a/docs/auto_examples/01_DataOperations/plot_design_matrix.ipynb b/docs/auto_examples/01_DataOperations/plot_design_matrix.ipynb index 19820921..6d348e83 100644 --- a/docs/auto_examples/01_DataOperations/plot_design_matrix.ipynb +++ b/docs/auto_examples/01_DataOperations/plot_design_matrix.ipynb @@ -15,7 +15,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "\nDesign Matrix Creation\n======================\n\nThis tutorial illustrates how to use the Design_Matrix class to flexibly create design matrices that can then be used with the Brain_Data class to perform univariate regression.\n\nDesign Matrices can be thought of as \"enhanced\" pandas dataframes; they can do everything a pandas dataframe is capable of, with some added features. Design Matrices follow a data organization format common in many machine learning applications such as the sci-kit learn API: 2d tables organized as observations by features. In the context of neuro-imaging this often translates to TRs by conditions of interest + nuisance covariates (1st level analysis), or participants by conditions/groups (2nd level analysis).\n\n\n" + "\nDesign Matrix\n==============\n\nThis tutorial illustrates how to use the Design_Matrix class to flexibly create design matrices that can then be used with the Brain_Data class to perform univariate regression.\n\nDesign Matrices can be thought of as \"enhanced\" pandas dataframes; they can do everything a pandas dataframe is capable of, with some added features. Design Matrices follow a data organization format common in many machine learning applications such as the sci-kit learn API: 2d tables organized as observations by features. In the context of neuro-imaging this often translates to TRs by conditions of interest + nuisance covariates (1st level analysis), or participants by conditions/groups (2nd level analysis).\n\n\n" ] }, { @@ -33,7 +33,7 @@ }, "outputs": [], "source": [ - "from nltools.data import Design_Matrix\nimport numpy as np\n\ndm = Design_Matrix(np.array([\n [1,0,0,0],\n [1,0,0,0],\n [0,0,0,0],\n [0,1,0,0],\n [0,1,0,0],\n [0,0,0,0],\n [0,0,1,0],\n [0,0,1,0],\n [0,0,0,0],\n [0,0,0,1],\n [0,0,0,1]\n ]),\n sampling_rate = 1.5,\n columns=['stim_A','stim_B','stim_C','stim_D']\n )" + "from nltools.data import Design_Matrix\nimport numpy as np\n\nTR = 1.5 # Design Matrices take a sampling_freq argument specified in hertz which can be converted as 1./TR\n\ndm = Design_Matrix(np.array([\n [0,0,0,0],\n [0,0,0,0],\n [1,0,0,0],\n [1,0,0,0],\n [0,0,0,0],\n [0,1,0,0],\n [0,1,0,0],\n [0,0,0,0],\n [0,0,1,0],\n [0,0,1,0],\n [0,0,0,0],\n [0,0,0,1],\n [0,0,0,1],\n [0,0,0,0],\n [0,0,0,0],\n [0,0,0,0],\n [0,0,0,0],\n [0,0,0,0],\n [0,0,0,0],\n [0,0,0,0],\n [0,0,0,0],\n [0,0,0,0]\n ]),\n sampling_freq = 1./TR,\n columns=['face_A','face_B','house_A','house_B']\n )" ] }, { @@ -94,7 +94,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "A common operation might include adding an intercept and polynomial trend terms (e.g. linear and quadtratic) as nuisance regressors. This is easy to do. Note that polynomial terms are normalized to unit variance (i.e. mean = 0, std = 1) before inclusion to keep values on approximately the same scale.\n\n" + "Adding nuisiance covariates\n---------------------------\n\nLegendre Polynomials\n********************\n\nA common operation is adding an intercept and polynomial trend terms (e.g. linear and quadtratic) as nuisance regressors. This is easy to do. Consistent with other software packages, these are orthogonal Legendre poylnomials on the scale -1 to 1.\n\n" ] }, { @@ -105,7 +105,7 @@ }, "outputs": [], "source": [ - "# with include_lower = True (default), 1 here means: 0-intercept, 1-linear-trend, 2-quadtratic-trend\ndm_with_nuissance = dm.add_poly(2,include_lower=True)\ndm_with_nuissance.heatmap()" + "# with include_lower = True (default), 2 here means: 0-intercept, 1-linear-trend, 2-quadtratic-trend\ndm_with_nuissance = dm.add_poly(2,include_lower=True)\ndm_with_nuissance.heatmap()" ] }, { @@ -130,7 +130,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Polynomial variables are not the only type of nuisance covariates that can be generate for you. Design Matrix also supports the creation of discrete-cosine basis functions ala SPM. This will create a series of filters added as new columns based on a specified duration, defaulting to 180s.\n\n" + "Discrete Cosine Basis Functions\n*******************************\n\nPolynomial variables are not the only type of nuisance covariates that can be generated for you. Design Matrix also supports the creation of discrete-cosine basis functions ala SPM. This will create a series of filters added as new columns based on a specified duration, defaulting to 180s. Let's create DCT filters for 20s durations in our toy data.\n\n" ] }, { @@ -141,14 +141,14 @@ }, "outputs": [], "source": [ - "# Short filter duration for our simple example\ndm_with_cosine = dm.add_dct_basis(duration=5)\nprint(dm_with_cosine.details())" + "# Short filter duration for our simple example\ndm_with_cosine = dm.add_dct_basis(duration=20)\ndm_with_cosine.heatmap()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Load and Manipulate an Onsets File\n-----------------------------------\n\nNltools provides basic file-reading support for 2 or 3 column formatted onset files. Users can look at the onsets_to_dm function as a template to build more complex file readers if desired or to see additional features. Nltools includes an example onsets file where each event lasted exactly 1 TR. Lets use that to create a design matrix with an intercept and linear trend\n\n" + "Data operations\n---------------\n\nPerforming convolution\n**********************\n\nDesign Matrix makes it easy to perform convolution and will auto-ignore all columns that are consider polynomials. The default convolution kernel is the Glover (1999) HRF parameterized by the glover_hrf implementation in nipy (see nltools.externals.hrf for details). However, any arbitrary kernel can be passed as a 1d numpy array, or multiple kernels can be passed as a 2d numpy array for highly flexible convolution across many types of data (e.g. SCR).\n\n" ] }, { @@ -159,32 +159,21 @@ }, "outputs": [], "source": [ - "from nltools.utils import get_resource_path\nfrom nltools.file_reader import onsets_to_dm\nfrom nltools.data import Design_Matrix\nimport os\n\nonsetsFile = os.path.join(get_resource_path(),'onsets_example.txt')\ndm = onsets_to_dm(onsetsFile, TR=2.0, runLength=160, sort=True,add_poly=1)\ndm.heatmap()" + "dm_with_nuissance_c = dm_with_nuissance.convolve()\nprint(dm_with_nuissance_c.details())\ndm_with_nuissance_c.heatmap()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Design Matrix makes it easy to perform convolution and will auto-ignore all columns that are consider polynomials. By default it will use the one-parameter glover_hrf kernel (see nipy for details). However, any kernel can be passed as an argument, including a list of different kernels for highly flexible convolution across many types of data (e.g. SCR).\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "dm = dm.convolve()\nprint(dm.details())\ndm.heatmap()" + "Design Matrix can do many different data operations in addition to convolution such as upsampling and downsampling to different frequencies, zscoring, etc. Check out the API documentation for how to use these methods.\n\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Load and Z-score a Covariates File\n----------------------------------\n\nNow we're going to handle a covariates file that's been generated by a preprocessing routine. First we'll read in the text file using pandas and convert it to a design matrix.\n\n" + "File Reading\n------------\n\nCreating a Design Matrix from an onsets file\n********************************************\n\nNltools provides basic file-reading support for 2 or 3 column formatted onset files. Users can look at the onsets_to_dm function as a template to build more complex file readers if desired or to see additional features. Nltools includes an example onsets file where each event lasted exactly 1 TR and TR = 2s. Lets use that to create a design matrix with an intercept and linear trend\n\n" ] }, { @@ -195,14 +184,14 @@ }, "outputs": [], "source": [ - "import pandas as pd\n\ncovariatesFile = os.path.join(get_resource_path(),'covariates_example.csv')\ncov = pd.read_csv(covariatesFile)\ncov = Design_Matrix(cov, sampling_rate = 2.0)\n# Design matrix uses seaborn's heatmap for plotting so excepts all keyword arguments\n# We're just adjusting colors here to visualize things a bit more nicely\ncov.heatmap(vmin=-1,vmax=1)" + "from nltools.utils import get_resource_path\nfrom nltools.file_reader import onsets_to_dm\nfrom nltools.data import Design_Matrix\nimport os\n\nTR = 2.0\nsampling_freq = 1./TR\nonsetsFile = os.path.join(get_resource_path(),'onsets_example.txt')\ndm = onsets_to_dm(onsetsFile, sampling_freq=sampling_freq, run_length=160, sort=True,add_poly=1)\ndm.heatmap()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Similar to adding polynomial terms, Design Matrix has multiple methods for data processing and transformation such as downsampling, upsampling, and z-scoring. Let's use the z-score method to normalize the covariates we just loaded.\n\n" + "Creating a Design Matrix from a generic csv file\n************************************************\n\nAlternatively you can read a generic csv file and transform it into a Design Matrix using pandas file reading capability. Here we'll read in an example covariates file that contains the output of motion realignment estimated during a fMRI preprocessing pipeline.\n\n" ] }, { @@ -213,14 +202,14 @@ }, "outputs": [], "source": [ - "# Use pandas built-in fillna to fill NaNs in the covariates files introduced during the pre-processing pipeline, before z-scoring\n# Z-score takes an optional argument of which columns to z-score. Since we don't want to z-score any spikes, so let's select everything except that column\ncov = cov.fillna(0).zscore(cov.columns[:-1])\ncov.heatmap(vmin=-1,vmax=1)" + "import pandas as pd\n\ncovariatesFile = os.path.join(get_resource_path(),'covariates_example.csv')\ncov = pd.read_csv(covariatesFile)\ncov = Design_Matrix(cov, sampling_freq =sampling_freq)\ncov.heatmap(vmin=-1,vmax=1) # alter plot to scale of covs; heatmap takes Seaborn heatmap arguments" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Concatenate Multiple Design Matrices\n------------------------------------\n\nA really nice feature of Design Matrix is simplified, but intelligent matrix concatentation. Here it's trivial to horizontally concatenate our convolved onsets and covariates, while keeping our column names and order.\n\n" + "Working with multiple Design Matrices\n-------------------------------------\n\nVertically \"stacking\" Design Matrices\n*************************************\nA common task is creating a separate design matrix for multiple runs of an experiment, (or multiple subjects) and vertically appending them to each other so that regression can be performed across all runs of an experiment. However, in order to account for run-differences its important (and common practice) to include separate run-wise polynomials (e.g. intercepts). Design Matrix's append method is intelligent and flexible enough to keep columns separated during appending automatically.\n\n" ] }, { @@ -231,14 +220,14 @@ }, "outputs": [], "source": [ - "full = dm.append(cov,axis=1)\nfull.heatmap(vmin=-1,vmax=1)" + "# Lets use the design matrix with polynomials from above\n# Stack \"run 1\" on top of \"run 2\"\nruns_1_and_2 = dm_with_nuissance.append(dm_with_nuissance,axis=0)\nruns_1_and_2.heatmap()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "But we can also intelligently vertically concatenate design matrices to handle say, different experimental runs, or participants. The method enables the user to indicate which columns to keep separated (if any) during concatenation or which to treat as extensions along the first dimension. By default the class will keep all polylnomial terms separated. This is extremely useful when building 1 large design matrix composed of several runs or participants with separate means.\n\n" + "Separating columns during append operations\n*******************************************\nNotice that all polynomials have been kept separated for you automatically and have been renamed to reflect the fact that they come from different runs. But Design Matrix is even more flexible. Let's say you want to estimate separate run-wise coefficients for all house stimuli too. Simply pass that into the `unique_cols` parameter of append.\n\n" ] }, { @@ -249,14 +238,21 @@ }, "outputs": [], "source": [ - "dm2 = dm.append(dm, axis=0)\ndm2.heatmap(vmin=-1,vmax=1)" + "runs_1_and_2 = dm_with_nuissance.append(dm_with_nuissance,unique_cols=['house*'],axis=0)\nruns_1_and_2.heatmap()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now notice how all stimuli that begin with 'house' have been made into separate columns for each run. In general `unique_cols` can take a list of columns to keep separated or simple wild cards that either begin with a term e.g. \"house*\" or end with one \"*house\".\n\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Specific columns of interest can also be kept separate during concatenation (e.g. keeping run-wise spikes separate). As an example, we treat our first experimental regressor as different across our two design matrices. Notice that the class also preserves (as best as possible) column ordering.\n\n" + "Putting it all together\n-----------------------\n\nA realistic workflow\n********************\nLet's combine all the examples above to build a work flow for a realistic first-level analysis fMRI analysis. This will include loading onsets from multiple experimental runs, and concatenating them into a large multi-run design matrix where we estimate a single set of coefficients for our variables of interest, but make sure we account for run-wise differences nuisiance covarites (e.g. motion) and baseline, trends, etc. For simplicity we'll just reuse the same onsets and covariates file multiple times.\n\n" ] }, { @@ -267,14 +263,21 @@ }, "outputs": [], "source": [ - "dm2 = dm.append(dm, axis=0, unique_cols=['BillyRiggins'])\ndm2.heatmap(vmin=-1,vmax=1)" + "num_runs = 4\nTR = 2.0\nsampling_freq = 1./TR\nall_runs = Design_Matrix(sampling_freq = sampling_freq)\nfor i in range(num_runs):\n\n # 1) Load in onsets for this run\n onsetsFile = os.path.join(get_resource_path(),'onsets_example.txt')\n dm = onsets_to_dm(onsetsFile, sampling_freq=sampling_freq,run_length=160,sort=True)\n\n # 2) Convolve them with the hrf\n dm = dm.convolve()\n\n # 2) Load in covariates for this run\n covariatesFile = os.path.join(get_resource_path(),'covariates_example.csv')\n cov = pd.read_csv(covariatesFile)\n cov = Design_Matrix(cov, sampling_freq = sampling_freq)\n\n # 3) In the covariates, fill any NaNs with 0, add intercept and linear trends and dct basis functions\n cov = cov.fillna(0)\n\n # Retain a list of nuisance covariates (e.g. motion and spikes) which we'll also want to also keep separate for each run\n cov_columns = cov.columns\n cov = cov.add_poly(1).add_dct_basis()\n\n # 4) Join the onsets and covariates together\n full = dm.append(cov,axis=1)\n\n # 5) Append it to the master Design Matrix keeping things separated by run\n all_runs = all_runs.append(full,axis=0,unique_cols=cov.columns)\n\nall_runs.heatmap(vmin=-1,vmax=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can see the left most columns of our multi-run design matrix contain our conditions of interest (stacked across all runs), the middle columns includes separate run-wise nuisiance covariates (motion, spikes) and the right most columns contain run specific polynomials (intercept, trends, etc).\n\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Design Matrix can also create polynomial terms and intelligently keep them separate during concatenation. For example lets concatenate 4 design matrices and create separate 2nd order polynomials for all of them\n\n" + "Data Diagnostics\n----------------\n\nLet's actually check if our design is estimable. Design Matrix provides a few tools for cleaning up highly correlated columns (resulting in failure if trying to perform regression), replacing data, and computing collinearity. By default the `clean` method will drop any columns correlated at r >= .95\n\n" ] }, { @@ -285,14 +288,21 @@ }, "outputs": [], "source": [ - "# Notice that append can take a list of Design Matrices in addition to just a single one\ndm_all = dm.append([dm,dm,dm], axis=0, add_poly=2)\ndm_all.heatmap(vmin=-1,vmax=1)" + "all_runs_cleaned = all_runs.clean(verbose=True)\nall_runs_cleaned.heatmap(vmin=-1,vmax=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Whoops, looks like above some of our polynomials and dct basis functions are highly correlated, but the clean method detected that and dropped them for us. In practice you'll often include polynomials or dct basis functions rather than both, but this was just an illustrative example.\n\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Data Diagnostics\n----------------\n\nDesign Matrix also provides a few tools for cleaning up perfectly correlated columns (resulting in failure if trying to perform regression), replacing data, and computing collinearity.\n\n" + "Estimating a first-level model\n------------------------------\n\nYou can now set this multi-run Design Matrix as the `X` attribute of a Brain_Data object containing EPI data for these four runs and estimate a regression in just a few lines of code.\n\n" ] }, { @@ -303,7 +313,7 @@ }, "outputs": [], "source": [ - "# We have a good design here so no problems\ndm_all.clean(verbose=False)\ndm_all.vif()" + "# This code is commented because we don't actually have niftis loaded for the purposes of this tutorial\n# See the other tutorials for more details on working with nifti files and Brain_Data objects\n\n# Assuming you already loaded up Nifti images like this\n# list_of_niftis = ['run_1.nii.gz','run_2.nii.gz','run_3.nii.gz','run_4.nii.gz']\n# all_run_data = Brain_Data(list_of_niftis)\n\n# Set our Design Matrix to the X attribute of Brain_Data object\n# all_run_data.X = all_runs_cleaned\n\n# Run the regression\n# results = all_run_data.regress()\n\n# This will produce N beta, t, and p images\n# where N is the number of columns in the design matrix" ] } ], diff --git a/docs/auto_examples/01_DataOperations/plot_design_matrix.py b/docs/auto_examples/01_DataOperations/plot_design_matrix.py index c22947d9..cc4ac352 100644 --- a/docs/auto_examples/01_DataOperations/plot_design_matrix.py +++ b/docs/auto_examples/01_DataOperations/plot_design_matrix.py @@ -1,6 +1,6 @@ """ -Design Matrix Creation -====================== +Design Matrix +============== This tutorial illustrates how to use the Design_Matrix class to flexibly create design matrices that can then be used with the Brain_Data class to perform univariate regression. @@ -17,7 +17,11 @@ from nltools.data import Design_Matrix import numpy as np +TR = 1.5 # Design Matrices take a sampling_freq argument specified in hertz which can be converted as 1./TR + dm = Design_Matrix(np.array([ + [0,0,0,0], + [0,0,0,0], [1,0,0,0], [1,0,0,0], [0,0,0,0], @@ -28,10 +32,19 @@ [0,0,1,0], [0,0,0,0], [0,0,0,1], - [0,0,0,1] + [0,0,0,1], + [0,0,0,0], + [0,0,0,0], + [0,0,0,0], + [0,0,0,0], + [0,0,0,0], + [0,0,0,0], + [0,0,0,0], + [0,0,0,0], + [0,0,0,0] ]), - sampling_rate = 1.5, - columns=['stim_A','stim_B','stim_C','stim_D'] + sampling_freq = 1./TR, + columns=['face_A','face_B','house_A','house_B'] ) ######################################################################### # Notice how this look exactly like a pandas dataframe. That's because design matrices are *subclasses* of dataframes with some extra attributes and methods. @@ -48,10 +61,17 @@ dm.heatmap() + ######################################################################### -# A common operation might include adding an intercept and polynomial trend terms (e.g. linear and quadtratic) as nuisance regressors. This is easy to do. Note that polynomial terms are normalized to unit variance (i.e. mean = 0, std = 1) before inclusion to keep values on approximately the same scale. +# Adding nuisiance covariates +# --------------------------- +# +# Legendre Polynomials +# ******************** +# +# A common operation is adding an intercept and polynomial trend terms (e.g. linear and quadtratic) as nuisance regressors. This is easy to do. Consistent with other software packages, these are orthogonal Legendre poylnomials on the scale -1 to 1. -# with include_lower = True (default), 1 here means: 0-intercept, 1-linear-trend, 2-quadtratic-trend +# with include_lower = True (default), 2 here means: 0-intercept, 1-linear-trend, 2-quadtratic-trend dm_with_nuissance = dm.add_poly(2,include_lower=True) dm_with_nuissance.heatmap() @@ -61,91 +81,162 @@ print(dm_with_nuissance.details()) ######################################################################### -# Polynomial variables are not the only type of nuisance covariates that can be generate for you. Design Matrix also supports the creation of discrete-cosine basis functions ala SPM. This will create a series of filters added as new columns based on a specified duration, defaulting to 180s. +# Discrete Cosine Basis Functions +# ******************************* +# +# Polynomial variables are not the only type of nuisance covariates that can be generated for you. Design Matrix also supports the creation of discrete-cosine basis functions ala SPM. This will create a series of filters added as new columns based on a specified duration, defaulting to 180s. Let's create DCT filters for 20s durations in our toy data. # Short filter duration for our simple example -dm_with_cosine = dm.add_dct_basis(duration=5) -print(dm_with_cosine.details()) +dm_with_cosine = dm.add_dct_basis(duration=20) +dm_with_cosine.heatmap() ######################################################################### -# Load and Manipulate an Onsets File -# ----------------------------------- +# Data operations +# --------------- +# +# Performing convolution +# ********************** # -# Nltools provides basic file-reading support for 2 or 3 column formatted onset files. Users can look at the onsets_to_dm function as a template to build more complex file readers if desired or to see additional features. Nltools includes an example onsets file where each event lasted exactly 1 TR. Lets use that to create a design matrix with an intercept and linear trend +# Design Matrix makes it easy to perform convolution and will auto-ignore all columns that are consider polynomials. The default convolution kernel is the Glover (1999) HRF parameterized by the glover_hrf implementation in nipy (see nltools.externals.hrf for details). However, any arbitrary kernel can be passed as a 1d numpy array, or multiple kernels can be passed as a 2d numpy array for highly flexible convolution across many types of data (e.g. SCR). + +dm_with_nuissance_c = dm_with_nuissance.convolve() +print(dm_with_nuissance_c.details()) +dm_with_nuissance_c.heatmap() + +######################################################################### +# Design Matrix can do many different data operations in addition to convolution such as upsampling and downsampling to different frequencies, zscoring, etc. Check out the API documentation for how to use these methods. + +######################################################################### +# File Reading +# ------------ +# +# Creating a Design Matrix from an onsets file +# ******************************************** +# +# Nltools provides basic file-reading support for 2 or 3 column formatted onset files. Users can look at the onsets_to_dm function as a template to build more complex file readers if desired or to see additional features. Nltools includes an example onsets file where each event lasted exactly 1 TR and TR = 2s. Lets use that to create a design matrix with an intercept and linear trend from nltools.utils import get_resource_path from nltools.file_reader import onsets_to_dm from nltools.data import Design_Matrix import os +TR = 2.0 +sampling_freq = 1./TR onsetsFile = os.path.join(get_resource_path(),'onsets_example.txt') -dm = onsets_to_dm(onsetsFile, TR=2.0, runLength=160, sort=True,add_poly=1) -dm.heatmap() - -######################################################################### -# Design Matrix makes it easy to perform convolution and will auto-ignore all columns that are consider polynomials. By default it will use the one-parameter glover_hrf kernel (see nipy for details). However, any kernel can be passed as an argument, including a list of different kernels for highly flexible convolution across many types of data (e.g. SCR). - -dm = dm.convolve() -print(dm.details()) +dm = onsets_to_dm(onsetsFile, sampling_freq=sampling_freq, run_length=160, sort=True,add_poly=1) dm.heatmap() ######################################################################### -# Load and Z-score a Covariates File -# ---------------------------------- +# Creating a Design Matrix from a generic csv file +# ************************************************ # -# Now we're going to handle a covariates file that's been generated by a preprocessing routine. First we'll read in the text file using pandas and convert it to a design matrix. +# Alternatively you can read a generic csv file and transform it into a Design Matrix using pandas file reading capability. Here we'll read in an example covariates file that contains the output of motion realignment estimated during a fMRI preprocessing pipeline. import pandas as pd covariatesFile = os.path.join(get_resource_path(),'covariates_example.csv') cov = pd.read_csv(covariatesFile) -cov = Design_Matrix(cov, sampling_rate = 2.0) -# Design matrix uses seaborn's heatmap for plotting so excepts all keyword arguments -# We're just adjusting colors here to visualize things a bit more nicely -cov.heatmap(vmin=-1,vmax=1) +cov = Design_Matrix(cov, sampling_freq =sampling_freq) +cov.heatmap(vmin=-1,vmax=1) # alter plot to scale of covs; heatmap takes Seaborn heatmap arguments ######################################################################### -# Similar to adding polynomial terms, Design Matrix has multiple methods for data processing and transformation such as downsampling, upsampling, and z-scoring. Let's use the z-score method to normalize the covariates we just loaded. +# Working with multiple Design Matrices +# ------------------------------------- +# +# Vertically "stacking" Design Matrices +# ************************************* +# A common task is creating a separate design matrix for multiple runs of an experiment, (or multiple subjects) and vertically appending them to each other so that regression can be performed across all runs of an experiment. However, in order to account for run-differences its important (and common practice) to include separate run-wise polynomials (e.g. intercepts). Design Matrix's append method is intelligent and flexible enough to keep columns separated during appending automatically. -# Use pandas built-in fillna to fill NaNs in the covariates files introduced during the pre-processing pipeline, before z-scoring -# Z-score takes an optional argument of which columns to z-score. Since we don't want to z-score any spikes, so let's select everything except that column -cov = cov.fillna(0).zscore(cov.columns[:-1]) -cov.heatmap(vmin=-1,vmax=1) +# Lets use the design matrix with polynomials from above +# Stack "run 1" on top of "run 2" +runs_1_and_2 = dm_with_nuissance.append(dm_with_nuissance,axis=0) +runs_1_and_2.heatmap() ######################################################################### -# Concatenate Multiple Design Matrices -# ------------------------------------ -# -# A really nice feature of Design Matrix is simplified, but intelligent matrix concatentation. Here it's trivial to horizontally concatenate our convolved onsets and covariates, while keeping our column names and order. +# Separating columns during append operations +# ******************************************* +# Notice that all polynomials have been kept separated for you automatically and have been renamed to reflect the fact that they come from different runs. But Design Matrix is even more flexible. Let's say you want to estimate separate run-wise coefficients for all house stimuli too. Simply pass that into the `unique_cols` parameter of append. -full = dm.append(cov,axis=1) -full.heatmap(vmin=-1,vmax=1) +runs_1_and_2 = dm_with_nuissance.append(dm_with_nuissance,unique_cols=['house*'],axis=0) +runs_1_and_2.heatmap() ######################################################################### -# But we can also intelligently vertically concatenate design matrices to handle say, different experimental runs, or participants. The method enables the user to indicate which columns to keep separated (if any) during concatenation or which to treat as extensions along the first dimension. By default the class will keep all polylnomial terms separated. This is extremely useful when building 1 large design matrix composed of several runs or participants with separate means. - -dm2 = dm.append(dm, axis=0) -dm2.heatmap(vmin=-1,vmax=1) +# Now notice how all stimuli that begin with 'house' have been made into separate columns for each run. In general `unique_cols` can take a list of columns to keep separated or simple wild cards that either begin with a term e.g. "house*" or end with one "*house". ######################################################################### -# Specific columns of interest can also be kept separate during concatenation (e.g. keeping run-wise spikes separate). As an example, we treat our first experimental regressor as different across our two design matrices. Notice that the class also preserves (as best as possible) column ordering. +# Putting it all together +# ----------------------- +# +# A realistic workflow +# ******************** +# Let's combine all the examples above to build a work flow for a realistic first-level analysis fMRI analysis. This will include loading onsets from multiple experimental runs, and concatenating them into a large multi-run design matrix where we estimate a single set of coefficients for our variables of interest, but make sure we account for run-wise differences nuisiance covarites (e.g. motion) and baseline, trends, etc. For simplicity we'll just reuse the same onsets and covariates file multiple times. -dm2 = dm.append(dm, axis=0, unique_cols=['BillyRiggins']) -dm2.heatmap(vmin=-1,vmax=1) +num_runs = 4 +TR = 2.0 +sampling_freq = 1./TR +all_runs = Design_Matrix(sampling_freq = sampling_freq) +for i in range(num_runs): -######################################################################### -# Design Matrix can also create polynomial terms and intelligently keep them separate during concatenation. For example lets concatenate 4 design matrices and create separate 2nd order polynomials for all of them + # 1) Load in onsets for this run + onsetsFile = os.path.join(get_resource_path(),'onsets_example.txt') + dm = onsets_to_dm(onsetsFile, sampling_freq=sampling_freq,run_length=160,sort=True) + + # 2) Convolve them with the hrf + dm = dm.convolve() + + # 2) Load in covariates for this run + covariatesFile = os.path.join(get_resource_path(),'covariates_example.csv') + cov = pd.read_csv(covariatesFile) + cov = Design_Matrix(cov, sampling_freq = sampling_freq) + + # 3) In the covariates, fill any NaNs with 0, add intercept and linear trends and dct basis functions + cov = cov.fillna(0) + + # Retain a list of nuisance covariates (e.g. motion and spikes) which we'll also want to also keep separate for each run + cov_columns = cov.columns + cov = cov.add_poly(1).add_dct_basis() + + # 4) Join the onsets and covariates together + full = dm.append(cov,axis=1) -# Notice that append can take a list of Design Matrices in addition to just a single one -dm_all = dm.append([dm,dm,dm], axis=0, add_poly=2) -dm_all.heatmap(vmin=-1,vmax=1) + # 5) Append it to the master Design Matrix keeping things separated by run + all_runs = all_runs.append(full,axis=0,unique_cols=cov.columns) + +all_runs.heatmap(vmin=-1,vmax=1) + +######################################################################### +# We can see the left most columns of our multi-run design matrix contain our conditions of interest (stacked across all runs), the middle columns includes separate run-wise nuisiance covariates (motion, spikes) and the right most columns contain run specific polynomials (intercept, trends, etc). ######################################################################### # Data Diagnostics # ---------------- # -# Design Matrix also provides a few tools for cleaning up perfectly correlated columns (resulting in failure if trying to perform regression), replacing data, and computing collinearity. +# Let's actually check if our design is estimable. Design Matrix provides a few tools for cleaning up highly correlated columns (resulting in failure if trying to perform regression), replacing data, and computing collinearity. By default the `clean` method will drop any columns correlated at r >= .95 + +all_runs_cleaned = all_runs.clean(verbose=True) +all_runs_cleaned.heatmap(vmin=-1,vmax=1) + +######################################################################### +# Whoops, looks like above some of our polynomials and dct basis functions are highly correlated, but the clean method detected that and dropped them for us. In practice you'll often include polynomials or dct basis functions rather than both, but this was just an illustrative example. + +######################################################################### +# Estimating a first-level model +# ------------------------------ +# +# You can now set this multi-run Design Matrix as the `X` attribute of a Brain_Data object containing EPI data for these four runs and estimate a regression in just a few lines of code. + +# This code is commented because we don't actually have niftis loaded for the purposes of this tutorial +# See the other tutorials for more details on working with nifti files and Brain_Data objects + +# Assuming you already loaded up Nifti images like this +# list_of_niftis = ['run_1.nii.gz','run_2.nii.gz','run_3.nii.gz','run_4.nii.gz'] +# all_run_data = Brain_Data(list_of_niftis) + +# Set our Design Matrix to the X attribute of Brain_Data object +# all_run_data.X = all_runs_cleaned + +# Run the regression +# results = all_run_data.regress() -# We have a good design here so no problems -dm_all.clean(verbose=False) -dm_all.vif() +# This will produce N beta, t, and p images +# where N is the number of columns in the design matrix diff --git a/docs/auto_examples/01_DataOperations/plot_design_matrix.py.md5 b/docs/auto_examples/01_DataOperations/plot_design_matrix.py.md5 index 91f275b9..869cb99a 100644 --- a/docs/auto_examples/01_DataOperations/plot_design_matrix.py.md5 +++ b/docs/auto_examples/01_DataOperations/plot_design_matrix.py.md5 @@ -1 +1 @@ -674c1c7e55385d8ebc2e109812f5c3b8 \ No newline at end of file +d25dc9c6791965dca7df05760d6549bf \ No newline at end of file diff --git a/docs/auto_examples/01_DataOperations/plot_design_matrix.rst b/docs/auto_examples/01_DataOperations/plot_design_matrix.rst index f4cd77d0..bd4abb81 100644 --- a/docs/auto_examples/01_DataOperations/plot_design_matrix.rst +++ b/docs/auto_examples/01_DataOperations/plot_design_matrix.rst @@ -3,8 +3,8 @@ .. _sphx_glr_auto_examples_01_DataOperations_plot_design_matrix.py: -Design Matrix Creation -====================== +Design Matrix +============== This tutorial illustrates how to use the Design_Matrix class to flexibly create design matrices that can then be used with the Brain_Data class to perform univariate regression. @@ -25,7 +25,11 @@ Lets just create a basic toy design matrix by hand corresponding to a single par from nltools.data import Design_Matrix import numpy as np + TR = 1.5 # Design Matrices take a sampling_freq argument specified in hertz which can be converted as 1./TR + dm = Design_Matrix(np.array([ + [0,0,0,0], + [0,0,0,0], [1,0,0,0], [1,0,0,0], [0,0,0,0], @@ -36,10 +40,19 @@ Lets just create a basic toy design matrix by hand corresponding to a single par [0,0,1,0], [0,0,0,0], [0,0,0,1], - [0,0,0,1] + [0,0,0,1], + [0,0,0,0], + [0,0,0,0], + [0,0,0,0], + [0,0,0,0], + [0,0,0,0], + [0,0,0,0], + [0,0,0,0], + [0,0,0,0], + [0,0,0,0] ]), - sampling_rate = 1.5, - columns=['stim_A','stim_B','stim_C','stim_D'] + sampling_freq = 1./TR, + columns=['face_A','face_B','house_A','house_B'] ) @@ -64,18 +77,29 @@ Notice how this look exactly like a pandas dataframe. That's because design matr Out:: - stim_A stim_B stim_C stim_D - 0 1 0 0 0 - 1 1 0 0 0 - 2 0 0 0 0 - 3 0 1 0 0 - 4 0 1 0 0 - 5 0 0 0 0 - 6 0 0 1 0 - 7 0 0 1 0 - 8 0 0 0 0 - 9 0 0 0 1 - 10 0 0 0 1 + face_A face_B house_A house_B + 0 0 0 0 0 + 1 0 0 0 0 + 2 1 0 0 0 + 3 1 0 0 0 + 4 0 0 0 0 + 5 0 1 0 0 + 6 0 1 0 0 + 7 0 0 0 0 + 8 0 0 1 0 + 9 0 0 1 0 + 10 0 0 0 0 + 11 0 0 0 1 + 12 0 0 0 1 + 13 0 0 0 0 + 14 0 0 0 0 + 15 0 0 0 0 + 16 0 0 0 0 + 17 0 0 0 0 + 18 0 0 0 0 + 19 0 0 0 0 + 20 0 0 0 0 + 21 0 0 0 0 Let's take a look at some of that meta-data. We can see that no columns have been convolved as of yet and this design matrix has no polynomial terms (e.g. such as an intercept or linear trend). @@ -95,7 +119,7 @@ Let's take a look at some of that meta-data. We can see that no columns have bee Out:: - nltools.data.design_matrix.Design_Matrix(sampling_rate=1.5, shape=(11, 4), convolved=[], polynomials=[]) + nltools.data.design_matrix.Design_Matrix(sampling_freq=0.6666666666666666 (hz), shape=(22, 4), multi=False, convolved=[], polynomials=[]) We can also easily visualize the design matrix using an SPM/AFNI/FSL style heatmap @@ -110,20 +134,27 @@ We can also easily visualize the design matrix using an SPM/AFNI/FSL style heatm + .. image:: /auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_001.png :align: center -A common operation might include adding an intercept and polynomial trend terms (e.g. linear and quadtratic) as nuisance regressors. This is easy to do. Note that polynomial terms are normalized to unit variance (i.e. mean = 0, std = 1) before inclusion to keep values on approximately the same scale. +Adding nuisiance covariates +--------------------------- + +Legendre Polynomials +******************** + +A common operation is adding an intercept and polynomial trend terms (e.g. linear and quadtratic) as nuisance regressors. This is easy to do. Consistent with other software packages, these are orthogonal Legendre poylnomials on the scale -1 to 1. .. code-block:: python - # with include_lower = True (default), 1 here means: 0-intercept, 1-linear-trend, 2-quadtratic-trend + # with include_lower = True (default), 2 here means: 0-intercept, 1-linear-trend, 2-quadtratic-trend dm_with_nuissance = dm.add_poly(2,include_lower=True) dm_with_nuissance.heatmap() @@ -153,10 +184,13 @@ We can see that 3 new columns were added to the design matrix. We can also inspe Out:: - nltools.data.design_matrix.Design_Matrix(sampling_rate=1.5, shape=(11, 7), convolved=[], polynomials=['intercept', 'poly_1', 'poly_2']) + nltools.data.design_matrix.Design_Matrix(sampling_freq=0.6666666666666666 (hz), shape=(22, 7), multi=False, convolved=[], polynomials=['poly_0', 'poly_1', 'poly_2']) -Polynomial variables are not the only type of nuisance covariates that can be generate for you. Design Matrix also supports the creation of discrete-cosine basis functions ala SPM. This will create a series of filters added as new columns based on a specified duration, defaulting to 180s. +Discrete Cosine Basis Functions +******************************* + +Polynomial variables are not the only type of nuisance covariates that can be generated for you. Design Matrix also supports the creation of discrete-cosine basis functions ala SPM. This will create a series of filters added as new columns based on a specified duration, defaulting to 180s. Let's create DCT filters for 20s durations in our toy data. @@ -164,77 +198,89 @@ Polynomial variables are not the only type of nuisance covariates that can be ge # Short filter duration for our simple example - dm_with_cosine = dm.add_dct_basis(duration=5) - print(dm_with_cosine.details()) + dm_with_cosine = dm.add_dct_basis(duration=20) + dm_with_cosine.heatmap() +.. image:: /auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_003.png + :align: center -.. rst-class:: sphx-glr-script-out - Out:: - nltools.data.design_matrix.Design_Matrix(sampling_rate=1.5, shape=(11, 10), convolved=[], polynomials=['cosine_1', 'cosine_2', 'cosine_3', 'cosine_4', 'cosine_5', 'cosine_6']) +Data operations +--------------- -Load and Manipulate an Onsets File ------------------------------------ +Performing convolution +********************** -Nltools provides basic file-reading support for 2 or 3 column formatted onset files. Users can look at the onsets_to_dm function as a template to build more complex file readers if desired or to see additional features. Nltools includes an example onsets file where each event lasted exactly 1 TR. Lets use that to create a design matrix with an intercept and linear trend +Design Matrix makes it easy to perform convolution and will auto-ignore all columns that are consider polynomials. The default convolution kernel is the Glover (1999) HRF parameterized by the glover_hrf implementation in nipy (see nltools.externals.hrf for details). However, any arbitrary kernel can be passed as a 1d numpy array, or multiple kernels can be passed as a 2d numpy array for highly flexible convolution across many types of data (e.g. SCR). .. code-block:: python - from nltools.utils import get_resource_path - from nltools.file_reader import onsets_to_dm - from nltools.data import Design_Matrix - import os - - onsetsFile = os.path.join(get_resource_path(),'onsets_example.txt') - dm = onsets_to_dm(onsetsFile, TR=2.0, runLength=160, sort=True,add_poly=1) - dm.heatmap() + dm_with_nuissance_c = dm_with_nuissance.convolve() + print(dm_with_nuissance_c.details()) + dm_with_nuissance_c.heatmap() -.. image:: /auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_003.png +.. image:: /auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_004.png :align: center +.. rst-class:: sphx-glr-script-out + + Out:: + + nltools.data.design_matrix.Design_Matrix(sampling_freq=0.6666666666666666 (hz), shape=(22, 7), multi=False, convolved=['face_A', 'face_B', 'house_A', 'house_B'], polynomials=['poly_0', 'poly_1', 'poly_2']) + + +Design Matrix can do many different data operations in addition to convolution such as upsampling and downsampling to different frequencies, zscoring, etc. Check out the API documentation for how to use these methods. -Design Matrix makes it easy to perform convolution and will auto-ignore all columns that are consider polynomials. By default it will use the one-parameter glover_hrf kernel (see nipy for details). However, any kernel can be passed as an argument, including a list of different kernels for highly flexible convolution across many types of data (e.g. SCR). +File Reading +------------ + +Creating a Design Matrix from an onsets file +******************************************** + +Nltools provides basic file-reading support for 2 or 3 column formatted onset files. Users can look at the onsets_to_dm function as a template to build more complex file readers if desired or to see additional features. Nltools includes an example onsets file where each event lasted exactly 1 TR and TR = 2s. Lets use that to create a design matrix with an intercept and linear trend .. code-block:: python - dm = dm.convolve() - print(dm.details()) + from nltools.utils import get_resource_path + from nltools.file_reader import onsets_to_dm + from nltools.data import Design_Matrix + import os + + TR = 2.0 + sampling_freq = 1./TR + onsetsFile = os.path.join(get_resource_path(),'onsets_example.txt') + dm = onsets_to_dm(onsetsFile, sampling_freq=sampling_freq, run_length=160, sort=True,add_poly=1) dm.heatmap() -.. image:: /auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_004.png +.. image:: /auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_005.png :align: center -.. rst-class:: sphx-glr-script-out - - Out:: - - nltools.data.design_matrix.Design_Matrix(sampling_rate=2.0, shape=(160, 15), convolved=['BillyRiggins', 'BuddyGarrity', 'CoachTaylor', 'GrandmaSaracen', 'JasonStreet', 'JulieTaylor', 'LandryClarke', 'LylaGarrity', 'MattSaracen', 'SmashWilliams', 'TamiTaylor', 'TimRiggins', 'TyraCollette'], polynomials=['intercept', 'poly_1']) -Load and Z-score a Covariates File ----------------------------------- +Creating a Design Matrix from a generic csv file +************************************************ -Now we're going to handle a covariates file that's been generated by a preprocessing routine. First we'll read in the text file using pandas and convert it to a design matrix. +Alternatively you can read a generic csv file and transform it into a Design Matrix using pandas file reading capability. Here we'll read in an example covariates file that contains the output of motion realignment estimated during a fMRI preprocessing pipeline. @@ -245,91 +291,112 @@ Now we're going to handle a covariates file that's been generated by a preproces covariatesFile = os.path.join(get_resource_path(),'covariates_example.csv') cov = pd.read_csv(covariatesFile) - cov = Design_Matrix(cov, sampling_rate = 2.0) - # Design matrix uses seaborn's heatmap for plotting so excepts all keyword arguments - # We're just adjusting colors here to visualize things a bit more nicely - cov.heatmap(vmin=-1,vmax=1) + cov = Design_Matrix(cov, sampling_freq =sampling_freq) + cov.heatmap(vmin=-1,vmax=1) # alter plot to scale of covs; heatmap takes Seaborn heatmap arguments -.. image:: /auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_005.png +.. image:: /auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_006.png :align: center -Similar to adding polynomial terms, Design Matrix has multiple methods for data processing and transformation such as downsampling, upsampling, and z-scoring. Let's use the z-score method to normalize the covariates we just loaded. +Working with multiple Design Matrices +------------------------------------- + +Vertically "stacking" Design Matrices +************************************* +A common task is creating a separate design matrix for multiple runs of an experiment, (or multiple subjects) and vertically appending them to each other so that regression can be performed across all runs of an experiment. However, in order to account for run-differences its important (and common practice) to include separate run-wise polynomials (e.g. intercepts). Design Matrix's append method is intelligent and flexible enough to keep columns separated during appending automatically. .. code-block:: python - # Use pandas built-in fillna to fill NaNs in the covariates files introduced during the pre-processing pipeline, before z-scoring - # Z-score takes an optional argument of which columns to z-score. Since we don't want to z-score any spikes, so let's select everything except that column - cov = cov.fillna(0).zscore(cov.columns[:-1]) - cov.heatmap(vmin=-1,vmax=1) + # Lets use the design matrix with polynomials from above + # Stack "run 1" on top of "run 2" + runs_1_and_2 = dm_with_nuissance.append(dm_with_nuissance,axis=0) + runs_1_and_2.heatmap() -.. image:: /auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_006.png +.. image:: /auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_007.png :align: center -Concatenate Multiple Design Matrices ------------------------------------- - -A really nice feature of Design Matrix is simplified, but intelligent matrix concatentation. Here it's trivial to horizontally concatenate our convolved onsets and covariates, while keeping our column names and order. +Separating columns during append operations +******************************************* +Notice that all polynomials have been kept separated for you automatically and have been renamed to reflect the fact that they come from different runs. But Design Matrix is even more flexible. Let's say you want to estimate separate run-wise coefficients for all house stimuli too. Simply pass that into the `unique_cols` parameter of append. .. code-block:: python - full = dm.append(cov,axis=1) - full.heatmap(vmin=-1,vmax=1) + runs_1_and_2 = dm_with_nuissance.append(dm_with_nuissance,unique_cols=['house*'],axis=0) + runs_1_and_2.heatmap() -.. image:: /auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_007.png +.. image:: /auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_008.png :align: center -But we can also intelligently vertically concatenate design matrices to handle say, different experimental runs, or participants. The method enables the user to indicate which columns to keep separated (if any) during concatenation or which to treat as extensions along the first dimension. By default the class will keep all polylnomial terms separated. This is extremely useful when building 1 large design matrix composed of several runs or participants with separate means. +Now notice how all stimuli that begin with 'house' have been made into separate columns for each run. In general `unique_cols` can take a list of columns to keep separated or simple wild cards that either begin with a term e.g. "house*" or end with one "*house". +Putting it all together +----------------------- -.. code-block:: python - - - dm2 = dm.append(dm, axis=0) - dm2.heatmap(vmin=-1,vmax=1) +A realistic workflow +******************** +Let's combine all the examples above to build a work flow for a realistic first-level analysis fMRI analysis. This will include loading onsets from multiple experimental runs, and concatenating them into a large multi-run design matrix where we estimate a single set of coefficients for our variables of interest, but make sure we account for run-wise differences nuisiance covarites (e.g. motion) and baseline, trends, etc. For simplicity we'll just reuse the same onsets and covariates file multiple times. +.. code-block:: python -.. image:: /auto_examples/01_DataOperations/images/sphx_glr_plot_design_matrix_008.png - :align: center + num_runs = 4 + TR = 2.0 + sampling_freq = 1./TR + all_runs = Design_Matrix(sampling_freq = sampling_freq) + for i in range(num_runs): + # 1) Load in onsets for this run + onsetsFile = os.path.join(get_resource_path(),'onsets_example.txt') + dm = onsets_to_dm(onsetsFile, sampling_freq=sampling_freq,run_length=160,sort=True) + # 2) Convolve them with the hrf + dm = dm.convolve() -Specific columns of interest can also be kept separate during concatenation (e.g. keeping run-wise spikes separate). As an example, we treat our first experimental regressor as different across our two design matrices. Notice that the class also preserves (as best as possible) column ordering. + # 2) Load in covariates for this run + covariatesFile = os.path.join(get_resource_path(),'covariates_example.csv') + cov = pd.read_csv(covariatesFile) + cov = Design_Matrix(cov, sampling_freq = sampling_freq) + # 3) In the covariates, fill any NaNs with 0, add intercept and linear trends and dct basis functions + cov = cov.fillna(0) + # Retain a list of nuisance covariates (e.g. motion and spikes) which we'll also want to also keep separate for each run + cov_columns = cov.columns + cov = cov.add_poly(1).add_dct_basis() -.. code-block:: python + # 4) Join the onsets and covariates together + full = dm.append(cov,axis=1) + # 5) Append it to the master Design Matrix keeping things separated by run + all_runs = all_runs.append(full,axis=0,unique_cols=cov.columns) - dm2 = dm.append(dm, axis=0, unique_cols=['BillyRiggins']) - dm2.heatmap(vmin=-1,vmax=1) + all_runs.heatmap(vmin=-1,vmax=1) @@ -340,16 +407,21 @@ Specific columns of interest can also be kept separate during concatenation (e.g -Design Matrix can also create polynomial terms and intelligently keep them separate during concatenation. For example lets concatenate 4 design matrices and create separate 2nd order polynomials for all of them +We can see the left most columns of our multi-run design matrix contain our conditions of interest (stacked across all runs), the middle columns includes separate run-wise nuisiance covariates (motion, spikes) and the right most columns contain run specific polynomials (intercept, trends, etc). + + +Data Diagnostics +---------------- + +Let's actually check if our design is estimable. Design Matrix provides a few tools for cleaning up highly correlated columns (resulting in failure if trying to perform regression), replacing data, and computing collinearity. By default the `clean` method will drop any columns correlated at r >= .95 .. code-block:: python - # Notice that append can take a list of Design Matrices in addition to just a single one - dm_all = dm.append([dm,dm,dm], axis=0, add_poly=2) - dm_all.heatmap(vmin=-1,vmax=1) + all_runs_cleaned = all_runs.clean(verbose=True) + all_runs_cleaned.heatmap(vmin=-1,vmax=1) @@ -362,41 +434,47 @@ Design Matrix can also create polynomial terms and intelligently keep them separ Out:: - Design Matrix already has intercept...skipping - Design Matrix already has 1th order polynomial...skipping - Design Matrix already has intercept...skipping - Design Matrix already has 1th order polynomial...skipping - Design Matrix already has intercept...skipping - Design Matrix already has 1th order polynomial...skipping - Design Matrix already has intercept...skipping - Design Matrix already has 1th order polynomial...skipping + 0_poly_1 and 0_cosine_1 correlated at 0.99 which is >= threshold of 0.95. Dropping 0_cosine_1 + 1_poly_1 and 1_cosine_1 correlated at 0.99 which is >= threshold of 0.95. Dropping 1_cosine_1 + 2_poly_1 and 2_cosine_1 correlated at 0.99 which is >= threshold of 0.95. Dropping 2_cosine_1 + 3_poly_1 and 3_cosine_1 correlated at 0.99 which is >= threshold of 0.95. Dropping 3_cosine_1 -Data Diagnostics ----------------- +Whoops, looks like above some of our polynomials and dct basis functions are highly correlated, but the clean method detected that and dropped them for us. In practice you'll often include polynomials or dct basis functions rather than both, but this was just an illustrative example. + + +Estimating a first-level model +------------------------------ -Design Matrix also provides a few tools for cleaning up perfectly correlated columns (resulting in failure if trying to perform regression), replacing data, and computing collinearity. +You can now set this multi-run Design Matrix as the `X` attribute of a Brain_Data object containing EPI data for these four runs and estimate a regression in just a few lines of code. .. code-block:: python - # We have a good design here so no problems - dm_all.clean(verbose=False) - dm_all.vif() + # This code is commented because we don't actually have niftis loaded for the purposes of this tutorial + # See the other tutorials for more details on working with nifti files and Brain_Data objects + # Assuming you already loaded up Nifti images like this + # list_of_niftis = ['run_1.nii.gz','run_2.nii.gz','run_3.nii.gz','run_4.nii.gz'] + # all_run_data = Brain_Data(list_of_niftis) + # Set our Design Matrix to the X attribute of Brain_Data object + # all_run_data.X = all_runs_cleaned + + # Run the regression + # results = all_run_data.regress() + + # This will produce N beta, t, and p images + # where N is the number of columns in the design matrix -.. rst-class:: sphx-glr-script-out - Out:: - Dropping columns not needed...skipping -**Total running time of the script:** ( 0 minutes 1.402 seconds) +**Total running time of the script:** ( 0 minutes 2.540 seconds) diff --git a/docs/auto_examples/01_DataOperations/plot_design_matrix_codeobj.pickle b/docs/auto_examples/01_DataOperations/plot_design_matrix_codeobj.pickle index ea62ebef..280911a6 100644 Binary files a/docs/auto_examples/01_DataOperations/plot_design_matrix_codeobj.pickle and b/docs/auto_examples/01_DataOperations/plot_design_matrix_codeobj.pickle differ diff --git a/docs/auto_examples/01_DataOperations/plot_mask_codeobj.pickle b/docs/auto_examples/01_DataOperations/plot_mask_codeobj.pickle index a0f37557..fc7a7e28 100644 Binary files a/docs/auto_examples/01_DataOperations/plot_mask_codeobj.pickle and b/docs/auto_examples/01_DataOperations/plot_mask_codeobj.pickle differ diff --git a/docs/auto_examples/01_DataOperations/plot_neurovault_io_codeobj.pickle b/docs/auto_examples/01_DataOperations/plot_neurovault_io_codeobj.pickle index 6c283062..3852af7b 100644 Binary files a/docs/auto_examples/01_DataOperations/plot_neurovault_io_codeobj.pickle and b/docs/auto_examples/01_DataOperations/plot_neurovault_io_codeobj.pickle differ diff --git a/docs/auto_examples/02_Analysis/plot_decomposition_codeobj.pickle b/docs/auto_examples/02_Analysis/plot_decomposition_codeobj.pickle index 2ed98b88..f44a13cd 100644 Binary files a/docs/auto_examples/02_Analysis/plot_decomposition_codeobj.pickle and b/docs/auto_examples/02_Analysis/plot_decomposition_codeobj.pickle differ diff --git a/docs/auto_examples/02_Analysis/plot_multivariate_classification_codeobj.pickle b/docs/auto_examples/02_Analysis/plot_multivariate_classification_codeobj.pickle index cb5c5a1f..e8dffee5 100644 Binary files a/docs/auto_examples/02_Analysis/plot_multivariate_classification_codeobj.pickle and b/docs/auto_examples/02_Analysis/plot_multivariate_classification_codeobj.pickle differ diff --git a/docs/auto_examples/02_Analysis/plot_similarity_example_codeobj.pickle b/docs/auto_examples/02_Analysis/plot_similarity_example_codeobj.pickle index 465f2e72..a5e3374d 100644 Binary files a/docs/auto_examples/02_Analysis/plot_similarity_example_codeobj.pickle and b/docs/auto_examples/02_Analysis/plot_similarity_example_codeobj.pickle differ diff --git a/docs/auto_examples/02_Analysis/plot_univariate_regression_codeobj.pickle b/docs/auto_examples/02_Analysis/plot_univariate_regression_codeobj.pickle index 25181d70..92b31f04 100644 Binary files a/docs/auto_examples/02_Analysis/plot_univariate_regression_codeobj.pickle and b/docs/auto_examples/02_Analysis/plot_univariate_regression_codeobj.pickle differ diff --git a/docs/auto_examples/auto_examples_jupyter.zip b/docs/auto_examples/auto_examples_jupyter.zip index adf0a30b..48a457b7 100644 Binary files a/docs/auto_examples/auto_examples_jupyter.zip and b/docs/auto_examples/auto_examples_jupyter.zip differ diff --git a/docs/auto_examples/auto_examples_python.zip b/docs/auto_examples/auto_examples_python.zip index 6b09ddff..4862341f 100644 Binary files a/docs/auto_examples/auto_examples_python.zip and b/docs/auto_examples/auto_examples_python.zip differ diff --git a/docs/auto_examples/index.rst b/docs/auto_examples/index.rst index 8b1af970..5ff409e2 100644 --- a/docs/auto_examples/index.rst +++ b/docs/auto_examples/index.rst @@ -235,13 +235,13 @@ Neuroimaging Analysis Examples .. container:: sphx-glr-download - :download:`Download all examples in Python source code: auto_examples_python.zip ` + :download:`Download all examples in Python source code: auto_examples_python.zip ` .. container:: sphx-glr-download - :download:`Download all examples in Jupyter notebooks: auto_examples_jupyter.zip ` + :download:`Download all examples in Jupyter notebooks: auto_examples_jupyter.zip ` .. only:: html diff --git a/docs/backreferences/nltools.analysis.Roc.calculate.examples b/docs/backreferences/nltools.analysis.Roc.calculate.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.analysis.Roc.examples b/docs/backreferences/nltools.analysis.Roc.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.analysis.Roc.plot.examples b/docs/backreferences/nltools.analysis.Roc.plot.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.analysis.Roc.summary.examples b/docs/backreferences/nltools.analysis.Roc.summary.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.cross_validation.KFoldStratified.examples b/docs/backreferences/nltools.cross_validation.KFoldStratified.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.cross_validation.KFoldStratified.split.examples b/docs/backreferences/nltools.cross_validation.KFoldStratified.split.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.cross_validation.examples b/docs/backreferences/nltools.cross_validation.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.cross_validation.set_cv.examples b/docs/backreferences/nltools.cross_validation.set_cv.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Adjacency.append.examples b/docs/backreferences/nltools.data.Adjacency.append.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Adjacency.bootstrap.examples b/docs/backreferences/nltools.data.Adjacency.bootstrap.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Adjacency.copy.examples b/docs/backreferences/nltools.data.Adjacency.copy.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Adjacency.distance.examples b/docs/backreferences/nltools.data.Adjacency.distance.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Adjacency.examples b/docs/backreferences/nltools.data.Adjacency.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Adjacency.isempty.examples b/docs/backreferences/nltools.data.Adjacency.isempty.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Adjacency.mean.examples b/docs/backreferences/nltools.data.Adjacency.mean.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Adjacency.plot.examples b/docs/backreferences/nltools.data.Adjacency.plot.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Adjacency.plot_label_distance.examples b/docs/backreferences/nltools.data.Adjacency.plot_label_distance.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Adjacency.plot_silhouette.examples b/docs/backreferences/nltools.data.Adjacency.plot_silhouette.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Adjacency.shape.examples b/docs/backreferences/nltools.data.Adjacency.shape.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Adjacency.similarity.examples b/docs/backreferences/nltools.data.Adjacency.similarity.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Adjacency.square_shape.examples b/docs/backreferences/nltools.data.Adjacency.square_shape.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Adjacency.squareform.examples b/docs/backreferences/nltools.data.Adjacency.squareform.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Adjacency.stats_label_distance.examples b/docs/backreferences/nltools.data.Adjacency.stats_label_distance.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Adjacency.std.examples b/docs/backreferences/nltools.data.Adjacency.std.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Adjacency.threshold.examples b/docs/backreferences/nltools.data.Adjacency.threshold.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Adjacency.to_graph.examples b/docs/backreferences/nltools.data.Adjacency.to_graph.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Adjacency.ttest.examples b/docs/backreferences/nltools.data.Adjacency.ttest.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Adjacency.write.examples b/docs/backreferences/nltools.data.Adjacency.write.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Brain_Data.aggregate.examples b/docs/backreferences/nltools.data.Brain_Data.aggregate.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Brain_Data.align.examples b/docs/backreferences/nltools.data.Brain_Data.align.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Brain_Data.append.examples b/docs/backreferences/nltools.data.Brain_Data.append.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Brain_Data.apply_mask.examples b/docs/backreferences/nltools.data.Brain_Data.apply_mask.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Brain_Data.astype.examples b/docs/backreferences/nltools.data.Brain_Data.astype.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Brain_Data.bootstrap.examples b/docs/backreferences/nltools.data.Brain_Data.bootstrap.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Brain_Data.copy.examples b/docs/backreferences/nltools.data.Brain_Data.copy.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Brain_Data.decompose.examples b/docs/backreferences/nltools.data.Brain_Data.decompose.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Brain_Data.detrend.examples b/docs/backreferences/nltools.data.Brain_Data.detrend.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Brain_Data.distance.examples b/docs/backreferences/nltools.data.Brain_Data.distance.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Brain_Data.dtype.examples b/docs/backreferences/nltools.data.Brain_Data.dtype.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Brain_Data.empty.examples b/docs/backreferences/nltools.data.Brain_Data.empty.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Brain_Data.examples b/docs/backreferences/nltools.data.Brain_Data.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Brain_Data.extract_roi.examples b/docs/backreferences/nltools.data.Brain_Data.extract_roi.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Brain_Data.filter.examples b/docs/backreferences/nltools.data.Brain_Data.filter.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Brain_Data.groupby.examples b/docs/backreferences/nltools.data.Brain_Data.groupby.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Brain_Data.icc.examples b/docs/backreferences/nltools.data.Brain_Data.icc.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Brain_Data.isempty.examples b/docs/backreferences/nltools.data.Brain_Data.isempty.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Brain_Data.mean.examples b/docs/backreferences/nltools.data.Brain_Data.mean.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Brain_Data.multivariate_similarity.examples b/docs/backreferences/nltools.data.Brain_Data.multivariate_similarity.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Brain_Data.plot.examples b/docs/backreferences/nltools.data.Brain_Data.plot.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Brain_Data.predict.examples b/docs/backreferences/nltools.data.Brain_Data.predict.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Brain_Data.r_to_z.examples b/docs/backreferences/nltools.data.Brain_Data.r_to_z.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Brain_Data.regions.examples b/docs/backreferences/nltools.data.Brain_Data.regions.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Brain_Data.regress.examples b/docs/backreferences/nltools.data.Brain_Data.regress.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Brain_Data.shape.examples b/docs/backreferences/nltools.data.Brain_Data.shape.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Brain_Data.similarity.examples b/docs/backreferences/nltools.data.Brain_Data.similarity.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Brain_Data.standardize.examples b/docs/backreferences/nltools.data.Brain_Data.standardize.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Brain_Data.std.examples b/docs/backreferences/nltools.data.Brain_Data.std.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Brain_Data.sum.examples b/docs/backreferences/nltools.data.Brain_Data.sum.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Brain_Data.threshold.examples b/docs/backreferences/nltools.data.Brain_Data.threshold.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Brain_Data.to_nifti.examples b/docs/backreferences/nltools.data.Brain_Data.to_nifti.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Brain_Data.transform_pairwise.examples b/docs/backreferences/nltools.data.Brain_Data.transform_pairwise.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Brain_Data.ttest.examples b/docs/backreferences/nltools.data.Brain_Data.ttest.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Brain_Data.upload_neurovault.examples b/docs/backreferences/nltools.data.Brain_Data.upload_neurovault.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Brain_Data.write.examples b/docs/backreferences/nltools.data.Brain_Data.write.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Design_Matrix.add_dct_basis.examples b/docs/backreferences/nltools.data.Design_Matrix.add_dct_basis.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Design_Matrix.addpoly.examples b/docs/backreferences/nltools.data.Design_Matrix.addpoly.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Design_Matrix.append.examples b/docs/backreferences/nltools.data.Design_Matrix.append.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Design_Matrix.convolve.examples b/docs/backreferences/nltools.data.Design_Matrix.convolve.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Design_Matrix.downsample.examples b/docs/backreferences/nltools.data.Design_Matrix.downsample.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Design_Matrix.examples b/docs/backreferences/nltools.data.Design_Matrix.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Design_Matrix.heatmap.examples b/docs/backreferences/nltools.data.Design_Matrix.heatmap.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Design_Matrix.horzcat.examples b/docs/backreferences/nltools.data.Design_Matrix.horzcat.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Design_Matrix.info.examples b/docs/backreferences/nltools.data.Design_Matrix.info.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Design_Matrix.upsample.examples b/docs/backreferences/nltools.data.Design_Matrix.upsample.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Design_Matrix.vertcat.examples b/docs/backreferences/nltools.data.Design_Matrix.vertcat.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Design_Matrix.vif.examples b/docs/backreferences/nltools.data.Design_Matrix.vif.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Design_Matrix.zscore.examples b/docs/backreferences/nltools.data.Design_Matrix.zscore.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Groupby.apply.examples b/docs/backreferences/nltools.data.Groupby.apply.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Groupby.combine.examples b/docs/backreferences/nltools.data.Groupby.combine.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Groupby.examples b/docs/backreferences/nltools.data.Groupby.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.data.Groupby.split.examples b/docs/backreferences/nltools.data.Groupby.split.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.datasets.download_collection.examples b/docs/backreferences/nltools.datasets.download_collection.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.datasets.download_nifti.examples b/docs/backreferences/nltools.datasets.download_nifti.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.datasets.examples b/docs/backreferences/nltools.datasets.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.datasets.fetch_emotion_ratings.examples b/docs/backreferences/nltools.datasets.fetch_emotion_ratings.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.datasets.fetch_pain.examples b/docs/backreferences/nltools.datasets.fetch_pain.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.datasets.get_collection_image_metadata.examples b/docs/backreferences/nltools.datasets.get_collection_image_metadata.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.file_reader.examples b/docs/backreferences/nltools.file_reader.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.file_reader.onsets_to_dm.examples b/docs/backreferences/nltools.file_reader.onsets_to_dm.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.mask.collapse_mask.examples b/docs/backreferences/nltools.mask.collapse_mask.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.mask.create_sphere.examples b/docs/backreferences/nltools.mask.create_sphere.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.mask.examples b/docs/backreferences/nltools.mask.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.mask.expand_mask.examples b/docs/backreferences/nltools.mask.expand_mask.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.plotting.dist_from_hyperplane_plot.examples b/docs/backreferences/nltools.plotting.dist_from_hyperplane_plot.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.plotting.examples b/docs/backreferences/nltools.plotting.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.plotting.iBrainViewer.examples b/docs/backreferences/nltools.plotting.iBrainViewer.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.plotting.plotBrain.examples b/docs/backreferences/nltools.plotting.plotBrain.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.plotting.plotTBrain.examples b/docs/backreferences/nltools.plotting.plotTBrain.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.plotting.plot_between_label_distance.examples b/docs/backreferences/nltools.plotting.plot_between_label_distance.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.plotting.plot_mean_label_distance.examples b/docs/backreferences/nltools.plotting.plot_mean_label_distance.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.plotting.plot_silhouette.examples b/docs/backreferences/nltools.plotting.plot_silhouette.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.plotting.plot_stacked_adjacency.examples b/docs/backreferences/nltools.plotting.plot_stacked_adjacency.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.plotting.probability_plot.examples b/docs/backreferences/nltools.plotting.probability_plot.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.plotting.roc_plot.examples b/docs/backreferences/nltools.plotting.roc_plot.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.plotting.scatterplot.examples b/docs/backreferences/nltools.plotting.scatterplot.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.simulator.Simulator.create_cov_data.examples b/docs/backreferences/nltools.simulator.Simulator.create_cov_data.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.simulator.Simulator.create_data.examples b/docs/backreferences/nltools.simulator.Simulator.create_data.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.simulator.Simulator.create_ncov_data.examples b/docs/backreferences/nltools.simulator.Simulator.create_ncov_data.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.simulator.Simulator.examples b/docs/backreferences/nltools.simulator.Simulator.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.simulator.Simulator.gaussian.examples b/docs/backreferences/nltools.simulator.Simulator.gaussian.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.simulator.Simulator.n_spheres.examples b/docs/backreferences/nltools.simulator.Simulator.n_spheres.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.simulator.Simulator.normal_noise.examples b/docs/backreferences/nltools.simulator.Simulator.normal_noise.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.simulator.Simulator.sphere.examples b/docs/backreferences/nltools.simulator.Simulator.sphere.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.simulator.Simulator.to_nifti.examples b/docs/backreferences/nltools.simulator.Simulator.to_nifti.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.simulator.examples b/docs/backreferences/nltools.simulator.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.stats.align.examples b/docs/backreferences/nltools.stats.align.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.stats.calc_bpm.examples b/docs/backreferences/nltools.stats.calc_bpm.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.stats.correlation_permutation.examples b/docs/backreferences/nltools.stats.correlation_permutation.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.stats.downsample.examples b/docs/backreferences/nltools.stats.downsample.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.stats.examples b/docs/backreferences/nltools.stats.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.stats.fdr.examples b/docs/backreferences/nltools.stats.fdr.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.stats.fisher_r_to_z.examples b/docs/backreferences/nltools.stats.fisher_r_to_z.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.stats.holm_bonf.examples b/docs/backreferences/nltools.stats.holm_bonf.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.stats.make_cosine_basis.examples b/docs/backreferences/nltools.stats.make_cosine_basis.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.stats.multi_threshold.examples b/docs/backreferences/nltools.stats.multi_threshold.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.stats.one_sample_permutation.examples b/docs/backreferences/nltools.stats.one_sample_permutation.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.stats.pearson.examples b/docs/backreferences/nltools.stats.pearson.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.stats.procrustes.examples b/docs/backreferences/nltools.stats.procrustes.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.stats.summarize_bootstrap.examples b/docs/backreferences/nltools.stats.summarize_bootstrap.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.stats.threshold.examples b/docs/backreferences/nltools.stats.threshold.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.stats.trim.examples b/docs/backreferences/nltools.stats.trim.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.stats.two_sample_permutation.examples b/docs/backreferences/nltools.stats.two_sample_permutation.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.stats.upsample.examples b/docs/backreferences/nltools.stats.upsample.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.stats.winsorize.examples b/docs/backreferences/nltools.stats.winsorize.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.stats.zscore.examples b/docs/backreferences/nltools.stats.zscore.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.utils.concatenate.examples b/docs/backreferences/nltools.utils.concatenate.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.utils.examples b/docs/backreferences/nltools.utils.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.utils.get_anatomical.examples b/docs/backreferences/nltools.utils.get_anatomical.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.utils.get_resource_path.examples b/docs/backreferences/nltools.utils.get_resource_path.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.utils.glover_hrf.examples b/docs/backreferences/nltools.utils.glover_hrf.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.utils.glover_time_derivative.examples b/docs/backreferences/nltools.utils.glover_time_derivative.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.utils.set_algorithm.examples b/docs/backreferences/nltools.utils.set_algorithm.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.utils.set_decomposition_algorithm.examples b/docs/backreferences/nltools.utils.set_decomposition_algorithm.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.utils.spm_dispersion_derivative.examples b/docs/backreferences/nltools.utils.spm_dispersion_derivative.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.utils.spm_hrf.examples b/docs/backreferences/nltools.utils.spm_hrf.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/backreferences/nltools.utils.spm_time_derivative.examples b/docs/backreferences/nltools.utils.spm_time_derivative.examples deleted file mode 100644 index e69de29b..00000000 diff --git a/examples/01_DataOperations/plot_design_matrix.py b/examples/01_DataOperations/plot_design_matrix.py index c22947d9..cc4ac352 100644 --- a/examples/01_DataOperations/plot_design_matrix.py +++ b/examples/01_DataOperations/plot_design_matrix.py @@ -1,6 +1,6 @@ """ -Design Matrix Creation -====================== +Design Matrix +============== This tutorial illustrates how to use the Design_Matrix class to flexibly create design matrices that can then be used with the Brain_Data class to perform univariate regression. @@ -17,7 +17,11 @@ from nltools.data import Design_Matrix import numpy as np +TR = 1.5 # Design Matrices take a sampling_freq argument specified in hertz which can be converted as 1./TR + dm = Design_Matrix(np.array([ + [0,0,0,0], + [0,0,0,0], [1,0,0,0], [1,0,0,0], [0,0,0,0], @@ -28,10 +32,19 @@ [0,0,1,0], [0,0,0,0], [0,0,0,1], - [0,0,0,1] + [0,0,0,1], + [0,0,0,0], + [0,0,0,0], + [0,0,0,0], + [0,0,0,0], + [0,0,0,0], + [0,0,0,0], + [0,0,0,0], + [0,0,0,0], + [0,0,0,0] ]), - sampling_rate = 1.5, - columns=['stim_A','stim_B','stim_C','stim_D'] + sampling_freq = 1./TR, + columns=['face_A','face_B','house_A','house_B'] ) ######################################################################### # Notice how this look exactly like a pandas dataframe. That's because design matrices are *subclasses* of dataframes with some extra attributes and methods. @@ -48,10 +61,17 @@ dm.heatmap() + ######################################################################### -# A common operation might include adding an intercept and polynomial trend terms (e.g. linear and quadtratic) as nuisance regressors. This is easy to do. Note that polynomial terms are normalized to unit variance (i.e. mean = 0, std = 1) before inclusion to keep values on approximately the same scale. +# Adding nuisiance covariates +# --------------------------- +# +# Legendre Polynomials +# ******************** +# +# A common operation is adding an intercept and polynomial trend terms (e.g. linear and quadtratic) as nuisance regressors. This is easy to do. Consistent with other software packages, these are orthogonal Legendre poylnomials on the scale -1 to 1. -# with include_lower = True (default), 1 here means: 0-intercept, 1-linear-trend, 2-quadtratic-trend +# with include_lower = True (default), 2 here means: 0-intercept, 1-linear-trend, 2-quadtratic-trend dm_with_nuissance = dm.add_poly(2,include_lower=True) dm_with_nuissance.heatmap() @@ -61,91 +81,162 @@ print(dm_with_nuissance.details()) ######################################################################### -# Polynomial variables are not the only type of nuisance covariates that can be generate for you. Design Matrix also supports the creation of discrete-cosine basis functions ala SPM. This will create a series of filters added as new columns based on a specified duration, defaulting to 180s. +# Discrete Cosine Basis Functions +# ******************************* +# +# Polynomial variables are not the only type of nuisance covariates that can be generated for you. Design Matrix also supports the creation of discrete-cosine basis functions ala SPM. This will create a series of filters added as new columns based on a specified duration, defaulting to 180s. Let's create DCT filters for 20s durations in our toy data. # Short filter duration for our simple example -dm_with_cosine = dm.add_dct_basis(duration=5) -print(dm_with_cosine.details()) +dm_with_cosine = dm.add_dct_basis(duration=20) +dm_with_cosine.heatmap() ######################################################################### -# Load and Manipulate an Onsets File -# ----------------------------------- +# Data operations +# --------------- +# +# Performing convolution +# ********************** # -# Nltools provides basic file-reading support for 2 or 3 column formatted onset files. Users can look at the onsets_to_dm function as a template to build more complex file readers if desired or to see additional features. Nltools includes an example onsets file where each event lasted exactly 1 TR. Lets use that to create a design matrix with an intercept and linear trend +# Design Matrix makes it easy to perform convolution and will auto-ignore all columns that are consider polynomials. The default convolution kernel is the Glover (1999) HRF parameterized by the glover_hrf implementation in nipy (see nltools.externals.hrf for details). However, any arbitrary kernel can be passed as a 1d numpy array, or multiple kernels can be passed as a 2d numpy array for highly flexible convolution across many types of data (e.g. SCR). + +dm_with_nuissance_c = dm_with_nuissance.convolve() +print(dm_with_nuissance_c.details()) +dm_with_nuissance_c.heatmap() + +######################################################################### +# Design Matrix can do many different data operations in addition to convolution such as upsampling and downsampling to different frequencies, zscoring, etc. Check out the API documentation for how to use these methods. + +######################################################################### +# File Reading +# ------------ +# +# Creating a Design Matrix from an onsets file +# ******************************************** +# +# Nltools provides basic file-reading support for 2 or 3 column formatted onset files. Users can look at the onsets_to_dm function as a template to build more complex file readers if desired or to see additional features. Nltools includes an example onsets file where each event lasted exactly 1 TR and TR = 2s. Lets use that to create a design matrix with an intercept and linear trend from nltools.utils import get_resource_path from nltools.file_reader import onsets_to_dm from nltools.data import Design_Matrix import os +TR = 2.0 +sampling_freq = 1./TR onsetsFile = os.path.join(get_resource_path(),'onsets_example.txt') -dm = onsets_to_dm(onsetsFile, TR=2.0, runLength=160, sort=True,add_poly=1) -dm.heatmap() - -######################################################################### -# Design Matrix makes it easy to perform convolution and will auto-ignore all columns that are consider polynomials. By default it will use the one-parameter glover_hrf kernel (see nipy for details). However, any kernel can be passed as an argument, including a list of different kernels for highly flexible convolution across many types of data (e.g. SCR). - -dm = dm.convolve() -print(dm.details()) +dm = onsets_to_dm(onsetsFile, sampling_freq=sampling_freq, run_length=160, sort=True,add_poly=1) dm.heatmap() ######################################################################### -# Load and Z-score a Covariates File -# ---------------------------------- +# Creating a Design Matrix from a generic csv file +# ************************************************ # -# Now we're going to handle a covariates file that's been generated by a preprocessing routine. First we'll read in the text file using pandas and convert it to a design matrix. +# Alternatively you can read a generic csv file and transform it into a Design Matrix using pandas file reading capability. Here we'll read in an example covariates file that contains the output of motion realignment estimated during a fMRI preprocessing pipeline. import pandas as pd covariatesFile = os.path.join(get_resource_path(),'covariates_example.csv') cov = pd.read_csv(covariatesFile) -cov = Design_Matrix(cov, sampling_rate = 2.0) -# Design matrix uses seaborn's heatmap for plotting so excepts all keyword arguments -# We're just adjusting colors here to visualize things a bit more nicely -cov.heatmap(vmin=-1,vmax=1) +cov = Design_Matrix(cov, sampling_freq =sampling_freq) +cov.heatmap(vmin=-1,vmax=1) # alter plot to scale of covs; heatmap takes Seaborn heatmap arguments ######################################################################### -# Similar to adding polynomial terms, Design Matrix has multiple methods for data processing and transformation such as downsampling, upsampling, and z-scoring. Let's use the z-score method to normalize the covariates we just loaded. +# Working with multiple Design Matrices +# ------------------------------------- +# +# Vertically "stacking" Design Matrices +# ************************************* +# A common task is creating a separate design matrix for multiple runs of an experiment, (or multiple subjects) and vertically appending them to each other so that regression can be performed across all runs of an experiment. However, in order to account for run-differences its important (and common practice) to include separate run-wise polynomials (e.g. intercepts). Design Matrix's append method is intelligent and flexible enough to keep columns separated during appending automatically. -# Use pandas built-in fillna to fill NaNs in the covariates files introduced during the pre-processing pipeline, before z-scoring -# Z-score takes an optional argument of which columns to z-score. Since we don't want to z-score any spikes, so let's select everything except that column -cov = cov.fillna(0).zscore(cov.columns[:-1]) -cov.heatmap(vmin=-1,vmax=1) +# Lets use the design matrix with polynomials from above +# Stack "run 1" on top of "run 2" +runs_1_and_2 = dm_with_nuissance.append(dm_with_nuissance,axis=0) +runs_1_and_2.heatmap() ######################################################################### -# Concatenate Multiple Design Matrices -# ------------------------------------ -# -# A really nice feature of Design Matrix is simplified, but intelligent matrix concatentation. Here it's trivial to horizontally concatenate our convolved onsets and covariates, while keeping our column names and order. +# Separating columns during append operations +# ******************************************* +# Notice that all polynomials have been kept separated for you automatically and have been renamed to reflect the fact that they come from different runs. But Design Matrix is even more flexible. Let's say you want to estimate separate run-wise coefficients for all house stimuli too. Simply pass that into the `unique_cols` parameter of append. -full = dm.append(cov,axis=1) -full.heatmap(vmin=-1,vmax=1) +runs_1_and_2 = dm_with_nuissance.append(dm_with_nuissance,unique_cols=['house*'],axis=0) +runs_1_and_2.heatmap() ######################################################################### -# But we can also intelligently vertically concatenate design matrices to handle say, different experimental runs, or participants. The method enables the user to indicate which columns to keep separated (if any) during concatenation or which to treat as extensions along the first dimension. By default the class will keep all polylnomial terms separated. This is extremely useful when building 1 large design matrix composed of several runs or participants with separate means. - -dm2 = dm.append(dm, axis=0) -dm2.heatmap(vmin=-1,vmax=1) +# Now notice how all stimuli that begin with 'house' have been made into separate columns for each run. In general `unique_cols` can take a list of columns to keep separated or simple wild cards that either begin with a term e.g. "house*" or end with one "*house". ######################################################################### -# Specific columns of interest can also be kept separate during concatenation (e.g. keeping run-wise spikes separate). As an example, we treat our first experimental regressor as different across our two design matrices. Notice that the class also preserves (as best as possible) column ordering. +# Putting it all together +# ----------------------- +# +# A realistic workflow +# ******************** +# Let's combine all the examples above to build a work flow for a realistic first-level analysis fMRI analysis. This will include loading onsets from multiple experimental runs, and concatenating them into a large multi-run design matrix where we estimate a single set of coefficients for our variables of interest, but make sure we account for run-wise differences nuisiance covarites (e.g. motion) and baseline, trends, etc. For simplicity we'll just reuse the same onsets and covariates file multiple times. -dm2 = dm.append(dm, axis=0, unique_cols=['BillyRiggins']) -dm2.heatmap(vmin=-1,vmax=1) +num_runs = 4 +TR = 2.0 +sampling_freq = 1./TR +all_runs = Design_Matrix(sampling_freq = sampling_freq) +for i in range(num_runs): -######################################################################### -# Design Matrix can also create polynomial terms and intelligently keep them separate during concatenation. For example lets concatenate 4 design matrices and create separate 2nd order polynomials for all of them + # 1) Load in onsets for this run + onsetsFile = os.path.join(get_resource_path(),'onsets_example.txt') + dm = onsets_to_dm(onsetsFile, sampling_freq=sampling_freq,run_length=160,sort=True) + + # 2) Convolve them with the hrf + dm = dm.convolve() + + # 2) Load in covariates for this run + covariatesFile = os.path.join(get_resource_path(),'covariates_example.csv') + cov = pd.read_csv(covariatesFile) + cov = Design_Matrix(cov, sampling_freq = sampling_freq) + + # 3) In the covariates, fill any NaNs with 0, add intercept and linear trends and dct basis functions + cov = cov.fillna(0) + + # Retain a list of nuisance covariates (e.g. motion and spikes) which we'll also want to also keep separate for each run + cov_columns = cov.columns + cov = cov.add_poly(1).add_dct_basis() + + # 4) Join the onsets and covariates together + full = dm.append(cov,axis=1) -# Notice that append can take a list of Design Matrices in addition to just a single one -dm_all = dm.append([dm,dm,dm], axis=0, add_poly=2) -dm_all.heatmap(vmin=-1,vmax=1) + # 5) Append it to the master Design Matrix keeping things separated by run + all_runs = all_runs.append(full,axis=0,unique_cols=cov.columns) + +all_runs.heatmap(vmin=-1,vmax=1) + +######################################################################### +# We can see the left most columns of our multi-run design matrix contain our conditions of interest (stacked across all runs), the middle columns includes separate run-wise nuisiance covariates (motion, spikes) and the right most columns contain run specific polynomials (intercept, trends, etc). ######################################################################### # Data Diagnostics # ---------------- # -# Design Matrix also provides a few tools for cleaning up perfectly correlated columns (resulting in failure if trying to perform regression), replacing data, and computing collinearity. +# Let's actually check if our design is estimable. Design Matrix provides a few tools for cleaning up highly correlated columns (resulting in failure if trying to perform regression), replacing data, and computing collinearity. By default the `clean` method will drop any columns correlated at r >= .95 + +all_runs_cleaned = all_runs.clean(verbose=True) +all_runs_cleaned.heatmap(vmin=-1,vmax=1) + +######################################################################### +# Whoops, looks like above some of our polynomials and dct basis functions are highly correlated, but the clean method detected that and dropped them for us. In practice you'll often include polynomials or dct basis functions rather than both, but this was just an illustrative example. + +######################################################################### +# Estimating a first-level model +# ------------------------------ +# +# You can now set this multi-run Design Matrix as the `X` attribute of a Brain_Data object containing EPI data for these four runs and estimate a regression in just a few lines of code. + +# This code is commented because we don't actually have niftis loaded for the purposes of this tutorial +# See the other tutorials for more details on working with nifti files and Brain_Data objects + +# Assuming you already loaded up Nifti images like this +# list_of_niftis = ['run_1.nii.gz','run_2.nii.gz','run_3.nii.gz','run_4.nii.gz'] +# all_run_data = Brain_Data(list_of_niftis) + +# Set our Design Matrix to the X attribute of Brain_Data object +# all_run_data.X = all_runs_cleaned + +# Run the regression +# results = all_run_data.regress() -# We have a good design here so no problems -dm_all.clean(verbose=False) -dm_all.vif() +# This will produce N beta, t, and p images +# where N is the number of columns in the design matrix diff --git a/nltools/data/brain_data.py b/nltools/data/brain_data.py index 513854b9..4ddcc312 100644 --- a/nltools/data/brain_data.py +++ b/nltools/data/brain_data.py @@ -327,6 +327,19 @@ def write(self, file_name=None): self.to_nifti().to_filename(file_name) + def scale(self, scale_val=100.): + """ Scale all values such that theya re on the range 0 - scale_val, via grand-mean scaling. This is NOT global-scaling/intensity normalization. This is useful for ensuring that data is on a common scale (e.g. good for multiple runs, participants, etc) and if the default value of 100 is used, can be interpreted as something akin to (but not exactly) "percent signal change." This is consistent with default behavior in AFNI and SPM. Change this value to 10000 to make consistent with FSL. + + Args: + scale_val (int/float): what value to send the grand-mean to; default 100 + + """ + + out = deepcopy(self) + out.data = out.data / out.data.mean() * scale_val + + return out + def plot(self, limit=5, anatomical=None, **kwargs): """ Create a quick plot of self.data. Will plot each image separately @@ -399,18 +412,23 @@ def regress(self,mode='ols',**kwargs): b,t,p,df,res = regress(self.X,self.data,mode=mode,**kwargs) sigma = np.std(res,axis=0,ddof=self.X.shape[1]) - b_out = deepcopy(self) - b_out.data = b - t_out = deepcopy(self) + # Prevent copy of all data in self multiple times; instead start with an empty instance and copy only needed attributes from self, and use this as a template for other outputs + b_out = self.__class__() + b_out.mask = deepcopy(self.mask) + b_out.nifti_masker = deepcopy(self.nifti_masker) + + # Use this as template for other outputs before setting data + t_out = b_out.copy() t_out.data = t - p_out = deepcopy(self) + p_out = b_out.copy() p_out.data = p - df_out = deepcopy(self) + df_out = b_out.copy() df_out.data = df - sigma_out = deepcopy(self) + sigma_out = b_out.copy() sigma_out.data = sigma - res_out = deepcopy(self) + res_out = b_out.copy() res_out.data = res + b_out.data = b return {'beta': b_out, 't': t_out, 'p': p_out, 'df': df_out, 'sigma': sigma_out, 'residual': res_out} @@ -497,11 +515,12 @@ def ttest(self, threshold_dict=None): return out - def append(self, data): + def append(self, data, **kwargs): """ Append data to Brain_Data instance Args: data: Brain_Data instance to append + kwargs: optional inputs to Design_Matrix append Returns: out: new appended Brain_Data instance @@ -532,7 +551,7 @@ def append(self, data): out.Y = self.Y.append(data.Y) if self.X.size: if isinstance(self.X, pd.DataFrame): - out.X = self.X.append(data.X) + out.X = self.X.append(data.X,**kwargs) else: out.X = np.vstack([self.X, data.X]) return out @@ -1155,11 +1174,11 @@ def r_to_z(self): out.data = fisher_r_to_z(out.data) return out - def filter(self,sampling_rate=None, high_pass=None,low_pass=None,**kwargs): + def filter(self,sampling_freq=None, high_pass=None,low_pass=None,**kwargs): ''' Apply 5th order butterworth filter to data. Wraps nilearn functionality. Does not default to detrending and standardizing like nilearn implementation, but this can be overridden using kwargs. Args: - sampling_rate: sampling rate in seconds (i.e. TR) + sampling_freq: sampling freq in hertz (i.e. 1 / TR) high_pass: high pass cutoff frequency low_pass: low pass cutoff frequency kwargs: other keyword arguments to nilearn.signal.clean @@ -1168,18 +1187,18 @@ def filter(self,sampling_rate=None, high_pass=None,low_pass=None,**kwargs): Brain_Data: Filtered Brain_Data instance ''' - if sampling_rate is None: + if sampling_freq is None: raise ValueError("Need to provide sampling rate (TR)!") if high_pass is None and low_pass is None: raise ValueError("high_pass and/or low_pass cutoff must be" "provided!") - if sampling_rate is None: + if sampling_freq is None: raise ValueError("Need to provide TR!") standardize = kwargs.get('standardize',False) detrend = kwargs.get('detrend',False) out = self.copy() - out.data = clean(out.data,t_r=sampling_rate,detrend=detrend,standardize=standardize,high_pass=high_pass,low_pass=low_pass,**kwargs) + out.data = clean(out.data,t_r= 1. / sampling_freq,detrend=detrend,standardize=standardize,high_pass=high_pass,low_pass=low_pass,**kwargs) return out def dtype(self): diff --git a/nltools/data/design_matrix.py b/nltools/data/design_matrix.py index 2abe0928..3a513ada 100644 --- a/nltools/data/design_matrix.py +++ b/nltools/data/design_matrix.py @@ -17,6 +17,7 @@ import seaborn as sns import matplotlib.pyplot as plt from scipy.stats import pearsonr +from scipy.special import legendre import six from ..external.hrf import glover_hrf from nltools.stats import (downsample, @@ -24,6 +25,7 @@ zscore, make_cosine_basis ) +from nltools.utils import AmbiguityError class Design_Matrix_Series(Series): @@ -45,33 +47,31 @@ def _constructor_expanddim(self): class Design_Matrix(DataFrame): - """Design_Matrix is a class to represent design matrices with convenience - functionality for convolution, upsampling and downsampling. It plays - nicely with Brain_Data and can be used to build an experimental design - to pass to Brain_Data's X attribute. It is essentially an enhanced - pandas df, with extra attributes and methods. Methods always return a - new design matrix instance. + """Design_Matrix is a class to represent design matrices with special methods for data processing (e.g. convolution, upsampling, downsampling) and also intelligent and flexible and intelligent appending (e.g. auto-matically keep certain columns or polynomial terms separated during concatentation). It plays nicely with Brain_Data and can be used to build an experimental design to pass to Brain_Data's X attribute. It is essentially an enhanced pandas df, with extra attributes and methods. Methods always return a new design matrix instance (copy). Column names are always string types. Args: - sampling_rate (float): sampling rate of each row in seconds (e.g. TR in neuroimaging) + sampling_freq (float): sampling rate of each row in hertz; To covert seconds to hertz (e.g. in the case of TRs for neuroimaging) using hertz = 1 / TR convolved (list, optional): on what columns convolution has been performed; defaults to None polys (list, optional): list of polynomial terms in design matrix, e.g. intercept, polynomial trends, basis functions, etc; default None - """ - _metadata = ['sampling_rate', 'convolved', 'polys'] + _metadata = ['sampling_freq', 'convolved', 'polys', 'multi'] def __init__(self, *args, **kwargs): - sampling_rate = kwargs.pop('sampling_rate',None) + sampling_freq = kwargs.pop('sampling_freq',None) convolved = kwargs.pop('convolved', []) polys = kwargs.pop('polys', []) - self.sampling_rate = sampling_rate + self.sampling_freq = sampling_freq self.convolved = convolved self.polys = polys + self.multi = False super(Design_Matrix, self).__init__(*args, **kwargs) + # Ensure that column names are string types to all methods work + if not self.empty: + self.columns = [str(elem) for elem in self.columns] @property def _constructor(self): @@ -84,12 +84,13 @@ def _constructor_sliced(self): def _inherit_attributes(self, dm_out, atts=[ - 'sampling_rate', + 'sampling_freq', 'convolved', - 'polys']): + 'polys', + 'multi']): """ - This is helper function that simply ensures that attributes are copied over from an the current Design_Matrix to a new Design_Matrix. + This is helper function that simply ensures that attributes are copied over from the current Design_Matrix to a new Design_Matrix. Args: dm_out (Design_Matrix): the new design matrix to copy attributes to @@ -104,23 +105,30 @@ def _inherit_attributes(self, setattr(dm_out, item, getattr(self,item)) return dm_out + def _sort_cols(self): + """ + This is a helper function that tries to ensure that columns of a Design Matrix are sorted according to: a) those not separated during append operations, b) those separated during append operations, c) polynomials. Called primarily during vertical concatentation and cleaning. + """ + data_cols = [elem for elem in self.columns if not elem.split('_')[0].isdigit() and elem not in self.polys] + separated_cols = [elem for elem in self.columns if elem.split('_')[0].isdigit() and elem not in self.polys] + return self[data_cols + separated_cols + self.polys] + def details(self): """Print class meta data. """ - return '%s.%s(sampling_rate=%s, shape=%s, convolved=%s, polynomials=%s)' % ( + return '%s.%s(sampling_freq=%s (hz), shape=%s, multi=%s, convolved=%s, polynomials=%s)' % ( self.__class__.__module__, self.__class__.__name__, - self.sampling_rate, + self.sampling_freq, self.shape, + self.multi, self.convolved, self.polys ) - def append(self, dm, axis=0, keep_separate = True, add_poly = None, add_dct_basis = None, unique_cols = [], include_lower = True,fill_na=0): - """Method for concatenating another design matrix row or column-wise. - Can "uniquify" certain columns when appending row-wise, and by - default will attempt to do that with all polynomial terms (e.g. intercept, polynomial trends). Can also add new polynomial terms during vertical concatentation (when axis == 0). This will by default create new polynomial terms separately for each design matrix + def append(self, dm, axis=0, keep_separate = True, unique_cols = [], fill_na=0, verbose=False): + """Method for concatenating another design matrix row or column-wise. When concatenating row-wise, has the ability to keep certain columns separated if they exist in multiple design matrices (e.g. keeping separate intercepts for multiple runs). This is on by default and will automatically separate out polynomial columns (i.e. anything added with the `add_poly` or `add_dct_basis` methods). Additional columns can be separate by run using the `unique_cols` parameter. Can also add new polynomial terms during vertical concatentation (when axis == 0). This will by default create new polynomial terms separately for each design matrix Args: dm (Design_Matrix or list): design_matrix or list of design_matrices to append @@ -128,14 +136,11 @@ def append(self, dm, axis=0, keep_separate = True, add_poly = None, add_dct_basi keep_separate (bool,optional): whether try and uniquify columns; defaults to True; only applies when axis==0 - add_poly (int,optional): what order polynomial terms to add during append, only applied when axis = 0; default None - add_dct_basis (int,optional): add discrete cosine bassi function during append, only applied when axis = 0; default None - only applies when axis==0 unique_cols (list,optional): what additional columns to try to keep separated by uniquifying, only applies when axis = 0; defaults to None - include_lower (bool,optional): whether to also add lower order polynomial terms; only applies when add_poly is not None fill_na (str/int/float): if provided will fill NaNs with this value during row-wise appending (when axis = 0) if separate columns are desired; default 0 + verbose (bool): print messages during append about how polynomials are going to be separated """ if not isinstance(dm, list): @@ -145,25 +150,23 @@ def append(self, dm, axis=0, keep_separate = True, add_poly = None, add_dct_basi # Check all items to be appended are Design Matrices and have the same sampling rate if not all([isinstance(elem,self.__class__) for elem in to_append]): - raise TypeError("Each object in list must be a Design_Matrix!") - if not all([elem.sampling_rate == self.sampling_rate for elem in to_append]): - raise ValueError("All Design Matrices must have the same sampling rate!") + raise TypeError("Each object to be appended must be a Design_Matrix!") + if not all([elem.sampling_freq == self.sampling_freq for elem in to_append]): + raise ValueError("All Design Matrices must have the same sampling frequency!") if axis == 1: if any([not set(self.columns).isdisjoint(elem.columns) for elem in to_append]): print("Duplicate column names detected. Will be repeated.") - if add_poly or unique_cols: - print("add_poly and unique_cols only apply when axis=0...ignoring") - return self._horzcat(to_append) + return self._horzcat(to_append,fill_na=fill_na) elif axis == 0: - return self._vertcat(to_append, keep_separate=keep_separate,add_poly=add_poly,add_dct_basis=add_dct_basis,unique_cols=unique_cols,include_lower=include_lower,fill_na=fill_na) + return self._vertcat(to_append, keep_separate=keep_separate,unique_cols=unique_cols,fill_na=fill_na,verbose=verbose) else: raise ValueError("Axis must be 0 (row) or 1 (column)") - def _horzcat(self, to_append): + def _horzcat(self, to_append,fill_na): """Used by .append(). Append another design matrix, column-wise (horz cat). Always returns a new design_matrix. @@ -175,105 +178,287 @@ def _horzcat(self, to_append): out.polys = self.polys[:] for elem in to_append: out.polys += elem.polys + if fill_na is not None: + out = out.fillna(fill_na) else: raise ValueError("All Design Matrices must have the same number of rows!") return out - def _vertcat(self, df, keep_separate, add_poly, add_dct_basis, unique_cols, include_lower,fill_na): + def _vertcat(self, df, keep_separate, unique_cols, fill_na, verbose): """Used by .append(). Append another design matrix row-wise (vert cat). Always returns a new design matrix. """ - if unique_cols: + # make a copy of the dms to append + to_append = df[:] + orig = self.copy() # Make a copy of the original cause we might alter it + + # In order to append while keeping things separated we're going to create a new list of dataframes to append with renamed columns + modify_to_append = [] + all_polys = [] + cols_to_separate = [] + all_separated = [] + + if len(unique_cols): if not keep_separate: raise ValueError("unique_cols provided but keep_separate set to False. Set keep_separate to True to separate unique_cols") - to_append = df[:] # need to make a copy because we're altering df - + # 1) Make sure unique_cols are in original Design Matrix + if not self.empty: + to_rename = {} + unique_count = [] + for u in unique_cols: + if u.endswith('*'): + searchstr = u.split('*')[0] + elif u.startswith('*'): + searchstr = u.split('*')[1] + else: + searchstr = u + if not any([searchstr in elem for elem in self.columns]): + raise ValueError("'{}' not present in any column name of original Design Matrix".format(searchstr)) + # 2) Prepend them with a 0_ if this dm has never been appended to be for otherwise grab their current prepended index are and start a unique_cols counter + else: + for c in self.columns: + if searchstr in c: + if self.multi and c[0].isdigit(): + count = c.split('_')[0] + unique_count.append(int(count)) + else: + new_name = '0_' + c + all_separated.append(new_name) + to_rename[c] = new_name + all_separated.append(new_name) + cols_to_separate.append(searchstr) + + if to_rename: + orig = orig.rename(columns=to_rename) + max_unique_count = 0 + else: + max_unique_count = np.array(unique_count).max() + + # 3) Handle several different cases: + # a) original has no polys, dms to append do + # b) original has no polys, dms to append dont + # c) original has polys, dms to append do + # d) original has polys, dms to append dont + # Within each of these also keep a counter, update, and check for unique cols if needed + # This unique_col checking code is uglyly repeated in each conditional branch of a-d, but differs in subtle ways; probably could be cleaned up in a refactor if keep_separate: - if not all([set(self.polys) == set(elem.polys) for elem in to_append]): - raise ValueError("Design matrices do not match on their polynomial terms (i.e. intercepts, polynomial trends, basis functions). This makes appending with separation ambigious and is not currently supported. Either make sure all constant terms are the same or make sure no Design Matrix has any constant terms and add them during appending with the 'add_poly' and 'unique_cols' arguments") - - orig = self.copy() # Make a copy of the original cause we might alter it - - if add_poly: - orig = orig.add_poly(add_poly,include_lower) - for i,d in enumerate(to_append): - d = d.add_poly(add_poly,include_lower) - to_append[i] = d - - if add_dct_basis: - orig = orig.add_dct_basis(add_dct_basis) - for i,d in enumerate(to_append): - d = d.add_dct_basis(add_dct_basis) - to_append[i] = d - - all_cols = unique_cols + orig.polys - all_dms = [orig] + to_append - all_polys = [] - is_data = [] - for i,dm in enumerate(all_dms): - # Figure out what columns we need to relabel - cols_to_relabel = [col for col in dm.columns if col in all_cols] - if cols_to_relabel: - # Create a dictionary with the new names, e.g. {'intercept': '0_intercept'} - cols_dict = {} - # Rename the columns and update the dm - for c in cols_to_relabel: - cols_dict[c] = str(i) + '_' + c - if c not in unique_cols: - all_polys.append(cols_dict[c]) - dm = dm.rename(columns=cols_dict) - all_dms[i] = dm - - out = pd.concat(all_dms,axis=0,ignore_index=True) - if fill_na is not None: - out = out.fillna(fill_na) + if not len(self.polys): + # Self no polys; append has polys. + if any([len(elem.polys) for elem in to_append]): + if verbose: + print("Keep separate requested but original Design Matrix has no polynomial terms but matrices to be appended do. Inherting appended Design Matrices' polynomials...") + for i,dm in enumerate(to_append): + for p in dm.polys: + all_polys.append(p) + + # Handle renaming additional unique cols to keep separate + if cols_to_separate: + if verbose: + print("Unique cols requested. Trying to keep {} separated".format(cols_to_separate)) + to_rename = {} + data_cols = dm.drop(dm.polys,axis=1).columns + print(data_cols) + for u in cols_to_separate: + for c in data_cols: + if u in c: + if dm.multi: + count = int(c.split('_')[0]) + name = '_'.join(c.split('_')[1:]) + count += max_unique_count + 1 + new_name = str(count) + '_' + name + to_rename[c] = new_name + else: + new_name = str(max_unique_count + 1) + '_' + c + to_rename[c] = new_name + all_separated.append(new_name) + modify_to_append.append(dm.rename(columns=to_rename)) + max_unique_count += 1 + else: + modify_to_append.append(dm) + else: + # Self no polys; append no polys + if verbose: + print("Keep separate requested but neither original Design Matrix nor matrices to be appended have any polynomial terms Ignoring...") + # Handle renaming additional unique cols to keep separate + for i,dm in enumerate(to_append): + if cols_to_separate: + if verbose: + print("Unique cols requested. Trying to keep {} separated".format(cols_to_separate)) + to_rename = {} + data_cols = dm.drop(dm.polys,axis=1).columns + for u in cols_to_separate: + for c in data_cols: + if u in c: + if dm.multi: + count = int(c.split('_')[0]) + name = '_'.join(c.split('_')[1:]) + count += max_unique_count + 1 + new_name = str(count) + '_' + name + to_rename[c] = new_name + else: + new_name = str(max_unique_count + 1) + '_' + c + to_rename[c] = new_name + all_separated.append(new_name) + modify_to_append.append(dm.rename(columns=to_rename)) + max_unique_count += 1 + else: + modify_to_append.append(dm) + else: + # Self has polys; append has polys + if any([len(elem.polys) for elem in to_append]): + if verbose: + print("Keep separate requested and both original Design Matrix and matrices to be appended have polynomial terms. Separating...") + # Get the unique polynomials that currently exist + # [name, count/None, isRoot] + current_polys = [] + for p in self.polys: + if p.count('_') == 2: + isRoot = False + pSplit = p.split('_') + pName = '_'.join(pSplit[1:]) + pCount = int(pSplit[0]) + else: + isRoot = True + pName = p + pCount = 0 + current_polys.append([pName,pCount,isRoot]) + + # Mixed type numpy array to make things a little easier + current_polys = pd.DataFrame(current_polys).values + + # If current polynomials dont begin with a prepended numerical identifier, created one, e.g. 0_poly_1 + if any(current_polys[:,2]): + renamed_polys = {} + for i in range(current_polys.shape[0]): + renamed_polys[current_polys[i,0]] = str(current_polys[i,1]) + '_' + current_polys[i,0] + orig = orig.rename(columns = renamed_polys) + all_polys += list(renamed_polys.values()) + else: + all_polys += self.polys + + current_poly_max = current_polys[:,1].max() + + for i,dm in enumerate(to_append): + to_rename = {} + for p in dm.polys: + if p.count('_') == 2: + pSplit = p.split('_') + pName = '_'.join(pSplit[1:]) + pCount = int(pSplit[0]) + current_poly_max + 1 + else: + pName = p + pCount = current_poly_max + 1 + to_rename[p] = str(pCount) + '_' + pName + temp_dm = dm.rename(columns = to_rename) + current_poly_max += 1 + all_polys += list(to_rename.values()) + + # Handle renaming additional unique cols to keep separate + if cols_to_separate: + if verbose: + print("Unique cols requested. Trying to keep {} separated".format(cols_to_separate)) + to_rename = {} + data_cols = dm.drop(dm.polys,axis=1).columns + for u in cols_to_separate: + for c in data_cols: + if u in c: + if dm.multi: + count = int(c.split('_')[0]) + name = '_'.join(c.split('_')[1:]) + count += max_unique_count + 1 + new_name = str(count) + '_' + name + to_rename[c] = new_name + else: + new_name = str(max_unique_count + 1) + '_' + c + to_rename[c] = new_name + all_separated.append(new_name) + + # Combine renamed polynomials and renamed uniqu_cols + modify_to_append.append(temp_dm.rename(columns=to_rename)) + max_unique_count += 1 + else: + modify_to_append.append(temp_dm) + else: + # Self has polys; append no polys + if verbose: + print("Keep separate requested but only original Design Matrix has polynomial terms. Retaining original Design Matrix's polynomials only...") + all_polys += self.polys + + # Handle renaming additional unique cols to keep separate + if cols_to_separate: + if verbose: + print("Unique cols requested. Trying to keep {} separated".format(cols_to_separate)) + for i,dm in enumerate(to_append): + to_rename = {} + data_cols = dm.drop(dm.polys,axis=1).columns + for u in cols_to_separate: + for c in data_cols: + if u in c: + if dm.multi: + count = int(c.split('_')[0]) + name = '_'.join(c.split('_')[1:]) + count += max_unique_count + 1 + new_name = str(count) + '_' + name + to_rename[c] = new_name + else: + new_name = str(max_unique_count + 1) + '_' + c + to_rename[c] = new_name + all_separated.append(new_name) + modify_to_append.append(dm.rename(to_rename)) + max_unique_count += 1 + else: + modify_to_append.append(dm) - out.sampling_rate = self.sampling_rate - out.convolved = self.convolved - out.polys = all_polys - data_cols = [elem for elem in out.columns if elem not in out.polys] - out = out[data_cols + out.polys] - else: - out = pd.concat([self] + to_append,axis=0,ignore_index=True) - out = self._inherit_attributes(out) - if add_poly: - out = out.add_poly(add_poly,include_lower) + # Combine original dm with the updated/renamed dms to be appended + all_dms = [orig] + modify_to_append - return out + out = pd.concat(all_dms,axis=0,ignore_index=True) + + if fill_na is not None: + out = out.fillna(fill_na) + + out.sampling_freq = self.sampling_freq + out.convolved = self.convolved + out.multi = True + out.polys = all_polys - def vif(self): + return out._sort_cols() + + def vif(self,exclude_polys=True): """Compute variance inflation factor amongst columns of design matrix, - ignoring the intercept. Much faster that statsmodels and more + ignoring polynomial terms. Much faster that statsmodels and more reliable too. Uses the same method as Matlab and R (diagonal elements of the inverted correlation matrix). Returns: vifs (list): list with length == number of columns - intercept + exclude_polys (bool): whether to skip checking of polynomial terms (i.e. intercept, trends, basis functions); default True """ assert self.shape[1] > 1, "Can't compute vif with only 1 column!" - if self.polys: + if self.polys and exclude_polys: out = self.drop(self.polys,axis=1) else: - out = self[self.columns] - + # Always drop intercept before computing VIF + intercepts = [elem for elem in self.columns if 'poly_0' in str(elem)] + out = self.drop(intercepts,axis=1) try: return np.diag(np.linalg.inv(out.corr()), 0) except np.linalg.LinAlgError: print("ERROR: Cannot compute vifs! Design Matrix is singular because it has some perfectly correlated or duplicated columns. Using .clean() method may help.") - def heatmap(self, figsize=(8, 6), **kwargs): """Visualize Design Matrix spm style. Use .plot() for typical pandas plotting functionality. Can pass optional keyword args to seaborn heatmap. """ + cmap = kwargs.pop('cmap','gray') fig, ax = plt.subplots(1, figsize=figsize) - ax = sns.heatmap(self, cmap='gray', cbar=False, ax=ax, **kwargs) + ax = sns.heatmap(self, cmap=cmap, cbar=False, ax=ax, **kwargs) for _, spine in ax.spines.items(): spine.set_visible(True) for i, label in enumerate(ax.get_yticklabels()): @@ -291,28 +476,27 @@ def convolve(self, conv_func='hrf', columns=None): """Perform convolution using an arbitrary function. Args: - conv_func (ndarray or string): either a 1d numpy array containing output of a function that you want to convolve; a samples by kernel 2d array of several kernels to convolve; or th string 'hrf' which defaults to a glover HRF function at the Design_matrix's sampling_rate + conv_func (ndarray or string): either a 1d numpy array containing output of a function that you want to convolve; a samples by kernel 2d array of several kernels to convolve; or the string 'hrf' which defaults to a glover HRF function at the Design_matrix's sampling_freq columns (list): what columns to perform convolution on; defaults - to all skipping intercept, and columns containing 'poly' or 'cosine' + to all non-polynomial columns """ - assert self.sampling_rate is not None, "Design_matrix has no sampling_rate set!" + assert self.sampling_freq is not None, "Design_matrix has no sampling_freq set!" if columns is None: - columns = [col for col in self.columns if 'intercept' not in col and 'poly' not in col and 'cosine' not in col] + columns = [col for col in self.columns if col not in self.polys] nonConvolved = [col for col in self.columns if col not in columns] - if isinstance(conv_func,six.string_types): + if isinstance(conv_func, np.ndarray): + assert len(conv_func.shape) <= 2, "2d conv_func must be formatted as samplex X kernals!" + elif isinstance(conv_func, six.string_types): assert conv_func == 'hrf',"Did you mean 'hrf'? 'hrf' can generate a kernel for you, otherwise custom kernels should be passed in as 1d or 2d arrays." - - assert self.sampling_rate is not None, "Design_matrix sampling rate not set. Can't figure out how to generate HRF!" - conv_func = glover_hrf(self.sampling_rate, oversampling=1) + conv_func = glover_hrf(1. / self.sampling_freq, oversampling=1.) else: - assert type(conv_func) == np.ndarray, 'Must provide a function for convolution!' + raise TypeError("conv_func must be a 1d or 2d numpy array organized as samples x kernels, or the string 'hrf' for the canonical glover hrf") if len(conv_func.shape) > 1: - assert conv_func.shape[0] > conv_func.shape[1], '2d conv_func must be formatted as, samples X kernels!' conv_mats = [] for i in range(conv_func.shape[1]): c = self[columns].apply(lambda x: np.convolve(x, conv_func[:,i])[:self.shape[0]]) @@ -334,19 +518,18 @@ def downsample(self, target,**kwargs): design matrix. Args: - target(float): downsampling target, typically in samples not - seconds + target(float): desired frequency in hz kwargs: additional inputs to nltools.stats.downsample """ - if target < self.sampling_rate: + if target > self.sampling_freq: raise ValueError("Target must be longer than current sampling rate") - df = Design_Matrix(downsample(self, sampling_freq=1./self.sampling_rate, target=target,target_type='seconds', **kwargs)) + df = Design_Matrix(downsample(self, sampling_freq= self.sampling_freq, target=target,target_type='hz', **kwargs)) # convert df to a design matrix newMat = self._inherit_attributes(df) - newMat.sampling_rate = target + newMat.sampling_freq = target return newMat def upsample(self, target,**kwargs): @@ -355,19 +538,18 @@ def upsample(self, target,**kwargs): design matrix. Args: - target(float): downsampling target, typically in samples not - seconds + target(float): desired frequence in hz kwargs: additional inputs to nltools.stats.downsample """ - if target > self.sampling_rate: + if target < self.sampling_freq: raise ValueError("Target must be shorter than current sampling rate") - df = Design_Matrix(upsample(self, sampling_freq=1./self.sampling_rate, target=target, target_type='seconds',**kwargs)) + df = Design_Matrix(upsample(self, sampling_freq= self.sampling_freq, target=target, target_type='hz',**kwargs)) # convert df to a design matrix newMat = self._inherit_attributes(df) - newMat.sampling_rate = target + newMat.sampling_freq = target return newMat def zscore(self, columns=[]): @@ -391,7 +573,7 @@ def zscore(self, columns=[]): return newMat def add_poly(self, order=0, include_lower=True): - """Add nth order polynomial terms as columns to design matrix. + """Add nth order Legendre polynomial terms as columns to design matrix. Good for adding constant/intercept to model (order = 0) and accounting for slow-frequency nuisance artifacts e.g. linear, quadratic, etc drifts. Care is recommended when using this with `.add_dct_basis()` as some columns will be highly correlated. Args: order (int): what order terms to add; 0 = constant/intercept @@ -402,36 +584,28 @@ def add_poly(self, order=0, include_lower=True): if order < 0: raise ValueError("Order must be 0 or greater") + if self.polys: + if any([elem.count('_') == 2 for elem in self.polys]): + raise AmbiguityError("It appears that this Design Matrix contains polynomial terms that were kept seperate from a previous append operation. This makes it ambiguous for adding polynomials terms. Try calling .add_poly() on each separate Design Matrix before appending them instead.") + polyDict = {} + # Normal/canonical legendre polynomials on the range -1,1 but with size defined by number of observations; keeps all polynomials on similar scales (i.e. big polys don't blow up) and betas are better behaved + norm_order = np.linspace(-1,1,self.shape[0]) - if order == 0 and 'intercept' in self.polys: - print("Design Matrix already has intercept...skipping") - return self - elif 'poly_'+str(order) in self.polys: + if 'poly_'+str(order) in self.polys: print("Design Matrix already has {}th order polynomial...skipping".format(order)) return self if include_lower: for i in range(0, order+1): - if i == 0: - if 'intercept' in self.polys: print("Design Matrix already has intercept...skipping") - else: - polyDict['intercept'] = np.repeat(1, self.shape[0]) + if 'poly_'+str(i) in self.polys: + print("Design Matrix already has {}th order polynomial...skipping".format(i)) else: - if 'poly_'+str(i) in self.polys: - print("Design Matrix already has {}th order polynomial...skipping".format(i)) - else: - # Unit scale polynomial terms so they don't blow up - vals = np.arange(self.shape[0]) - vals = (vals - np.mean(vals)) / np.std(vals) - polyDict['poly_' + str(i)] = vals ** i + polyDict['poly_' + str(i)] = legendre(i)(norm_order) else: - if order == 0: - polyDict['intercept'] = np.repeat(1, self.shape[0]) - else: - polyDict['poly_'+str(order)] = (range(self.shape[0]) - np.mean(range(self.shape[0])))**order + polyDict['poly_' + str(order)] = legendre(order)(norm_order) - toAdd = Design_Matrix(polyDict,sampling_rate=self.sampling_rate) + toAdd = Design_Matrix(polyDict,sampling_freq=self.sampling_freq) out = self.append(toAdd, axis=1) if out.polys: new_polys = out.polys + list(polyDict.keys()) @@ -440,20 +614,26 @@ def add_poly(self, order=0, include_lower=True): out.polys = list(polyDict.keys()) return out - def add_dct_basis(self,duration=180): - """Adds cosine basis functions to Design_Matrix columns, + def add_dct_basis(self,duration=180,drop=0): + """Adds unit scaled cosine basis functions to Design_Matrix columns, based on spm-style discrete cosine transform for use in - high-pass filtering. + high-pass filtering. Does not add intercept/constant. Care is recommended if using this along with `.add_poly()`, as some columns will be highly-correlated. Args: duration (int): length of filter in seconds + drop (int): index of which early/slow bases to drop if any; will always drop constant (i.e. intercept) like SPM. Unlike SPM, retains first basis (i.e. linear/sigmoidal). Will cumulatively drop bases up to and inclusive of index provided (e.g. 2, drops bases 1 and 2); default None """ - assert self.sampling_rate is not None, "Design_Matrix has no sampling_rate set!" - basis_mat = make_cosine_basis(self.shape[0],self.sampling_rate,duration) + assert self.sampling_freq is not None, "Design_Matrix has no sampling_freq set!" + + if self.polys: + if any([elem.count('_') == 2 and 'cosine' in elem for elem in self.polys]): + raise AmbiguityError("It appears that this Design Matrix contains cosine bases that were kept seperate from a previous append operation. This makes it ambiguous for adding polynomials terms. Try calling .add_dct_basis() on each separate Design Matrix before appending them instead.") + + basis_mat = make_cosine_basis(self.shape[0],1./self.sampling_freq,duration,drop=drop) basis_frame = Design_Matrix(basis_mat, - sampling_rate=self.sampling_rate) + sampling_freq=self.sampling_freq,columns = [str(elem) for elem in range(basis_mat.shape[1])]) basis_frame.columns = ['cosine_'+str(i+1) for i in range(basis_frame.shape[1])] @@ -494,13 +674,14 @@ def replace_data(self,data,column_names=None): else: raise TypeError("New data must be numpy array, pandas DataFrame or python dictionary type") - def clean(self,fill_na=0,exclude_polys=False,verbose=True): + def clean(self,fill_na=0,exclude_polys=False,thresh=.95,verbose=True): """ - Method to fill NaNs in Design Matrix and remove duplicate columns based on data values, NOT names. Columns are dropped if they cause the Design Matrix to become singular i.e. are perfectly correlated. In this case, only the first instance of that column will be retained and all others will be dropped. + Method to fill NaNs in Design Matrix and remove duplicate columns based on data values, NOT names. Columns are dropped if they are correlated >= the requested threshold (default = .95). In this case, only the first instance of that column will be retained and all others will be dropped. Args: fill_na (str/int/float): value to fill NaNs with set to None to retain NaNs; default 0 exclude_polys (bool): whether to skip checking of polynomial terms (i.e. intercept, trends, basis functions); default False + thresh (float): correlation threshold to use to drop redundant columns; default .95 verbose (bool): print what column names were dropped; default True """ @@ -518,15 +699,17 @@ def clean(self,fill_na=0,exclude_polys=False,verbose=True): for i, c in out.iteritems(): for j, c2 in out.iteritems(): if i != j: - r = pearsonr(c,c2)[0] - if (r > 0.99) and (j not in keep) and (j not in remove): + r = np.abs(pearsonr(c,c2)[0]) + if (r >= thresh) and (j not in keep) and (j not in remove): + if verbose: + print("{} and {} correlated at {} which is >= threshold of {}. Dropping {}".format(i,j,np.round(r,2),thresh,j)) keep.append(i) remove.append(j) if remove: out = out.drop(remove, axis=1) + out.polys = [elem for elem in out.polys if elem not in remove] + out = out._sort_cols() else: print("Dropping columns not needed...skipping") - if verbose: - print("Dropping columns: ", remove) np.seterr(**old_settings) return out diff --git a/nltools/file_reader.py b/nltools/file_reader.py index e3630e3f..8d809e34 100644 --- a/nltools/file_reader.py +++ b/nltools/file_reader.py @@ -15,11 +15,11 @@ import warnings -def onsets_to_dm(F, TR, runLength, header='infer', sort=False, keep_separate=True, +def onsets_to_dm(F, sampling_freq, run_length, header='infer', sort=False, keep_separate=True, add_poly=None, unique_cols=[], fill_na=None, **kwargs): """ - This function can assist in reading in one or several in a 2-3 column onsets files, specified in seconds and converting it to a Design Matrix organized as TRs X Stimulus Classes. Onsets files **must** be organized with columns in one of the following 4 formats: + This function can assist in reading in one or several in a 2-3 column onsets files, specified in seconds and converting it to a Design Matrix organized as samples X Stimulus Classes. Onsets files **must** be organized with columns in one of the following 4 formats: 1) 'Stim, Onset' 2) 'Onset, Stim' @@ -30,9 +30,7 @@ def onsets_to_dm(F, TR, runLength, header='infer', sort=False, keep_separate=Tru Args: F (filepath/DataFrame/list): path to file, pandas dataframe, or list of files or pandas dataframes - df (str or dataframe): path to file or pandas dataframe - TR (float): length of TR in seconds the run was collected at - runLength (int): number of TRs in the run these onsets came from + sampling_freq (float): sampling frequency in hertz; for TRs use (1 / TR) run_length (int): number of TRs in the run these onsets came from sort (bool, optional): whether to sort the columns of the resulting design matrix alphabetically; defaults to False @@ -52,6 +50,7 @@ def onsets_to_dm(F, TR, runLength, header='infer', sort=False, keep_separate=Tru F = [F] out = [] + TR = 1. / sampling_freq for f in F: if isinstance(f,six.string_types): df = pd.read_csv(f, header=header,**kwargs) @@ -76,7 +75,7 @@ def onsets_to_dm(F, TR, runLength, header='infer', sort=False, keep_separate=Tru df['Onset'] = df['Onset'].apply(lambda x: int(np.floor(x/TR))) #Build dummy codes - X = Design_Matrix(np.zeros([runLength,len(df['Stim'].unique())]),columns=df['Stim'].unique(),sampling_rate=TR) + X = Design_Matrix(np.zeros([run_length,len(df['Stim'].unique())]),columns=df['Stim'].unique(),sampling_freq=sampling_freq) for i, row in df.iterrows(): if df.shape[1] == 3: dur = np.ceil(row['Duration']/TR) @@ -90,6 +89,9 @@ def onsets_to_dm(F, TR, runLength, header='infer', sort=False, keep_separate=Tru if len(out) > 1: out_dm = out[0].append(out[1:],keep_separate = keep_separate, add_poly=add_poly, unique_cols=unique_cols,fill_na=fill_na) else: - out_dm = out[0].add_poly(add_poly) + if add_poly is not None: + out_dm = out[0].add_poly(add_poly) + else: + out_dm = out[0] return out_dm diff --git a/nltools/stats.py b/nltools/stats.py index 75da2438..0fa0517e 100644 --- a/nltools/stats.py +++ b/nltools/stats.py @@ -49,8 +49,8 @@ MAX_INT = np.iinfo(np.int32).max # Optional dependencies -ARMA = attempt_to_import('statsmodels.tsa.arima_model',name='ARMA', - fromlist=['ARMA']) +sm = attempt_to_import('statsmodels.tsa.arima_model',name='sm') + def pearson(x, y): """ Correlates row vector x with each row vector in 2D array y. From neurosynth.stats.py - author: Tal Yarkoni @@ -301,7 +301,7 @@ def downsample(data,sampling_freq=None, target=None, target_type='samples', Args: data: Pandas DataFrame or Series - sampling_freq: Sampling frequency of data + sampling_freq: Sampling frequency of data in hertz target: downsampling target target_type: type of target can be [samples,seconds,hz] method: (str) type of downsample method ['mean','median'], @@ -340,7 +340,7 @@ def upsample(data,sampling_freq=None, target=None, target_type='samples',method= Args: data: Pandas Series or DataFrame (Note: will drop non-numeric columns from DataFrame) - sampling_freq: Sampling frequency of data + sampling_freq: Sampling frequency of data in hertz target: downsampling target target_type: type of target can be [samples,seconds,hz] method (str):'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic' where 'zero', 'slinear', 'quadratic' and 'cubic' refer to a spline interpolation of zeroth, first, second or third order default: linear @@ -523,8 +523,8 @@ def correlation_permutation(data1, data2, n_permute=5000, metric='spearman', stats['p'] = _calc_pvalue(all_p,stats['correlation'],tail) return stats -def make_cosine_basis(nsamples, sampling_rate, filter_length, drop=0): - """ Create a series of cosines basic functions for discrete cosine +def make_cosine_basis(nsamples, sampling_freq, filter_length, unit_scale=True, drop=0): + """ Create a series of cosine basis functions for a discrete cosine transform. Based off of implementation in spm_filter and spm_dctmtx because scipy dct can only apply transforms but not return the basis functions. Like SPM, does not add constant (i.e. intercept), but does @@ -532,12 +532,13 @@ def make_cosine_basis(nsamples, sampling_rate, filter_length, drop=0): Args: nsamples (int): number of observations (e.g. TRs) - sampling_freq (float): sampling rate in seconds (e.g. TR length) + sampling_freq (float): sampling frequency in hertz (i.e. 1 / TR) filter_length (int): length of filter in seconds + unit_scale (true): assure that the basis functions are on the normalized range [-1, 1]; default True drop (int): index of which early/slow bases to drop if any; default is to drop constant (i.e. intercept) like SPM. Unlike SPM, retains first basis (i.e. linear/sigmoidal). Will cumulatively drop bases - up to and inclusive of index provided (e.g. 2, drops bases 0,1,2) + up to and inclusive of index provided (e.g. 2, drops bases 1 and 2) Returns: out (ndarray): nsamples x number of basis sets numpy array @@ -545,7 +546,7 @@ def make_cosine_basis(nsamples, sampling_rate, filter_length, drop=0): """ #Figure out number of basis functions to create - order = int(np.fix(2 * (nsamples * sampling_rate)/filter_length + 1)) + order = int(np.fix(2 * (nsamples * sampling_freq)/filter_length + 1)) n = np.arange(nsamples) @@ -559,14 +560,18 @@ def make_cosine_basis(nsamples, sampling_rate, filter_length, drop=0): for i in range(1,order): C[:,i] = np.sqrt(2./nsamples) * np.cos(np.pi*(2*n+1) * i/(2*nsamples)) - #Drop desired bases - drop += 1 - C = C[:, drop:] + # Drop intercept ala SPM + C = C[:,1:] + if C.size == 0: - raise ValueError('Basis function creation failed! nsamples is too small' - 'for requested filter_length.') - else: - return C + raise ValueError('Basis function creation failed! nsamples is too small for requested filter_length.') + + if unit_scale: + C *= 1. / C[0,0] + + C = C[:, drop:] + + return C def transform_pairwise(X, y): '''Transforms data into pairs with balanced labels for ranking @@ -705,6 +710,33 @@ def summarize_bootstrap(data, save_weights=False): output['samples'] = data return output +def _arma_func(X,Y,idx=None,**kwargs): + + """ + Fit an ARMA(p,q) model. If Y is a matrix and not a vector, expects an idx argument that refers to columns of Y. Used by regress(). + """ + method = kwargs.pop('method','css-mle') + order = kwargs.pop('order',(1,1)) + + maxiter = kwargs.pop('maxiter',50) + disp = kwargs.pop('disp',-1) + start_ar_lags = kwargs.pop('start_ar_lags',order[0]+1) + transparams = kwargs.pop('transparams',False) + trend = kwargs.pop('trend','nc') + + if len(Y.shape) == 2: + model = sm.tsa.arima_model.ARMA(endog=Y[:,idx],exog=X.values,order=order) + else: + model = sm.tsa.arima_model.ARMA(endog=Y,exog=X.values,order=order) + try: + res = model.fit(trend=trend,method=method,transparams=transparams, + maxiter=maxiter,disp=disp,start_ar_lags=start_ar_lags,**kwargs) + except: + res = model.fit(trend=trend,method=method,transparams=transparams, + maxiter=maxiter,disp=disp,start_ar_lags=start_ar_lags,start_params=np.repeat(1.,X.shape[1]+2)) + + return (res.params[:-2], res.tvalues[:-2],res.pvalues[:-2],res.df_resid, res.resid) + def regress(X,Y,mode='ols',**kwargs): """ This is a flexible function to run several types of regression models provided X and Y numpy arrays. Y can be a 1d numpy array or 2d numpy array. In the latter case, results will be output with shape 1 x Y.shape[1], in other words fitting a separate regression model to each column of Y. @@ -791,36 +823,12 @@ def regress(X,Y,mode='ols',**kwargs): max_nbytes = kwargs.pop('max_nbytes',1e8) verbose = kwargs.pop('verbose',0) - # Use function scope to get X and Y - def _arma_func(idx=None,**kwargs): - - """ - Fit an ARMA(p,q) model. If Y is a matrix and not a vector, expects an idx argument that refers to columns of Y. - """ - method = kwargs.pop('method','css-mle') - order = kwargs.pop('order',(1,1)) - - maxiter = kwargs.pop('maxiter',50) - disp = kwargs.pop('disp',-1) - start_ar_lags = kwargs.pop('start_ar_lags',order[0]+1) - transparams = kwargs.pop('transparams',False) - trend = kwargs.pop('trend','nc') - - if len(Y.shape) == 2: - model = ARMA(endog=Y[:,idx],exog=X,order=order) - else: - model = ARMA(endog=Y,exog=X,order=order) - res = model.fit(trend=trend,method=method,transparams=transparams, - maxiter=maxiter,disp=disp,start_ar_lags=start_ar_lags) - - return (res.params[:-2], res.tvalues[:-2],res.pvalues[:-2],res.df_resid, res.resid) - # Parallelize if Y vector contains more than 1 column if len(Y.shape) == 2: if backend == 'threading' and n_jobs == -1: n_jobs = 10 par_for = Parallel(n_jobs=n_jobs,verbose=verbose,backend=backend,max_nbytes=max_nbytes) - out_arma = par_for(delayed(_arma_func)(idx=i,**kwargs) for i in range(Y.shape[-1])) + out_arma = par_for(delayed(_arma_func)(X,Y,idx=i,**kwargs) for i in range(Y.shape[-1])) b = np.column_stack([elem[0] for elem in out_arma]) t = np.column_stack([elem[1] for elem in out_arma]) @@ -829,7 +837,7 @@ def _arma_func(idx=None,**kwargs): res = np.column_stack([elem[4] for elem in out_arma]) else: - b,t,p,df,res = _arma_func() + b,t,p,df,res = _arma_func(X,Y,**kwargs) return b, t, p, df, res diff --git a/nltools/tests/test_data.py b/nltools/tests/test_data.py index 2c142848..e2fab982 100644 --- a/nltools/tests/test_data.py +++ b/nltools/tests/test_data.py @@ -10,6 +10,7 @@ Design_Matrix) from nltools.stats import threshold, align from nltools.mask import create_sphere +from nltools.external.hrf import glover_hrf from sklearn.metrics import pairwise_distances import matplotlib import matplotlib.pyplot as plt @@ -604,93 +605,83 @@ def test_groupby(tmpdir): def test_designmat(tmpdir): - mat1 = Design_Matrix({ - 'X':[1, 4, 2, 7, 5, 9, 2, 1, 3, 2], - 'Y':[3, 0, 0, 6, 9, 9, 10, 10, 1, 10], - 'Z':[2, 2, 2, 2, 7, 0, 1, 3, 3, 2], - 'intercept':[1, 1, 1, 1, 1, 1, 1, 1, 1, 1] - }, - sampling_rate=2.0,polys=['intercept']) - - mat2 = Design_Matrix({ - 'X':[9, 9, 2, 7, 5, 0, 1, 1, 1, 2], - 'Y':[3, 3, 3, 6, 9, 0, 1, 10, 1, 10], - 'Z':[2, 6, 3, 2, 7, 0, 1, 7, 8, 8], - 'intercept':[1, 1, 1, 1, 1, 1, 1, 1, 1, 1] - }, - sampling_rate=2.0, polys=['intercept']) + # Design matrices are specified in terms of sampling frequency + TR = 2.0 + sampling_freq = 1. / TR - # Appending - # Basic horz cat - new_mat = mat1.append(mat1,axis=1) - assert new_mat.shape == (mat1.shape[0], mat1.shape[1] + mat2.shape[1]) - both_cols = list(mat1.columns) + list(mat1.columns) - assert all(new_mat.columns == both_cols) - # Basic vert cat - new_mat = mat1.append(mat1,axis=0) - assert new_mat.shape == (mat1.shape[0]*2, mat1.shape[1]+1) - # Advanced vert cat - new_mat = mat1.append(mat1,axis=0,keep_separate=False) - assert new_mat.shape == (mat1.shape[0]*2,mat1.shape[1]) - # More advanced vert cat - new_mat = mat1.append(mat1,axis=0,add_poly=2) - assert new_mat.shape == (mat1.shape[0]*2, 9) - - #convolution doesn't affect intercept - assert all(mat1.convolve().iloc[:, -1] == mat1.iloc[:, -1]) - #but it still works - assert (mat1.convolve().iloc[:, :3].values != mat1.iloc[:, :3].values).any() - - #Test vifs - expectedVifs = np.array([ 1.03984251, 1.02889877, 1.02261945]) - assert np.allclose(expectedVifs,mat1.vif()) - - #poly - mat1.add_poly(order=4).shape[1] == mat1.shape[1]+4 - mat1.add_poly(order=4, include_lower=False).shape[1] == mat1.shape[1]+1 - - #zscore - z = mat1.zscore(columns=['X', 'Z']) - assert (z['Y'] == mat1['Y']).all() - assert z.shape == mat1.shape - - # clean - mat = Design_Matrix({ - 'X':[1, 4, 2, 7, 5, 9, 2, 1, 3, 2], - 'A':[1, 4, 2, 7, 5, 9, 2, 1, 3, 2], - 'Y':[3, 0, 0, 6, 9, 9, 10, 10, 1, 10], - 'Z':[2, 2, 2, 2, 7, 0, 1, 3, 3, 2], - 'C':[1, 4, 2, 7, 5, 9, 2, 1, 3, 2], - 'intercept':[1, 1, 1, 1, 1, 1, 1, 1, 1, 1] - }, - sampling_rate=2.0,polys=['intercept']) - mat = mat[['X','A','Y','Z','C','intercept']] - assert all(mat.clean().columns == ['X','Y','Z','intercept']) + mat = Design_Matrix(np.random.randint(2,size=(500,4)),columns=['face_A','face_B','house_A','house_B'],sampling_freq=sampling_freq) + + # Basic ops + matp = mat.add_poly(2) + assert matp.shape[1] == 7 + assert mat.add_poly(2,include_lower=False).shape[1] == 5 + + matpd = matp.add_dct_basis() + assert matpd.shape[1] == 18 + + assert all(matpd.vif() < 2.0) + assert not all(matpd.vif(exclude_polys=False) < 2.0) + + matc = matpd.clean() + assert matc.shape[1] == 16 + + # Standard convolve + assert matpd.convolve().shape == matpd.shape + # Multi-kernal convolve + hrf = glover_hrf(TR,oversampling=1.) + assert matpd.convolve(conv_func=np.column_stack([hrf,hrf])).shape[1] == matpd.shape[1] + 4 + + matz = mat.zscore(columns = ['face_A','face_B']) + assert (matz[['house_A','house_B']] == mat[['house_A','house_B']]).all().all() # replace data - mat = Design_Matrix({ - 'X':[1, 4, 2, 7, 5, 9, 2, 1, 3, 2], - 'A':[1, 4, 2, 7, 5, 9, 2, 1, 3, 2], - 'Y':[3, 0, 0, 6, 9, 9, 10, 10, 1, 10], - 'Z':[2, 2, 2, 2, 7, 0, 1, 3, 3, 2], - 'C':[1, 4, 2, 7, 5, 9, 2, 1, 3, 2] - }, - sampling_rate=2.0) - - mat = mat.replace_data(np.ones((mat.shape[0],mat.shape[1]-1)),column_names=['a','b','c','d']) - - assert(np.allclose(mat.values,1)) - assert(all(mat.columns == ['a','b','c','d'])) - - #DCT basis_mat - mat = Design_Matrix(np.random.randint(2,size=(500,3)),sampling_rate=2.0) - mat = mat.add_dct_basis() - assert len(mat.polys) == 11 - assert mat.shape[1] == 14 + assert matpd.replace_data(np.zeros((500,4))).shape == matpd.shape #Up and down sampling - mat = Design_Matrix(np.random.randint(2,size=(500,4)),sampling_rate=2.0,columns=['a','b','c','d']) - target = 1 - assert mat.upsample(target).shape[0] == mat.shape[0]*2 - target*2 - target = 4 - assert mat.downsample(target).shape[0] == mat.shape[0]/2 + newTR = 1. + target = 1./newTR + assert matpd.upsample(target).shape[0] == matpd.shape[0]*2 - target*2 + newTR = 4. + target = 1./newTR + assert mat.downsample(target).shape[0] == matpd.shape[0]/2 + + # Appending + mats = matpd.append(matpd) + assert mats.shape[0] == mat.shape[0] * 2 + # Keep polys separate by default + assert (mats.shape[1] - 4) == (matpd.shape[1] - 4) * 2 + # Otherwise stack them + assert matpd.append(matpd,keep_separate=False).shape[1] == matpd.shape[1] + # Keep a single stimulus column separate + assert matpd.append(matpd,unique_cols=['face_A']).shape[1] == 33 + # Keep a common stimulus class separate + assert matpd.append(matpd,unique_cols=['face*']).shape[1] == 34 + # Keep a common stimulus class and a different single stim separate + assert matpd.append(matpd,unique_cols=['face*','house_A']).shape[1] == 35 + # Keep multiple stimulus class separate + assert matpd.append(matpd,unique_cols=['face*','house*']).shape[1] == 36 + + # Growing a multi-run design matrix; keeping things separate + num_runs = 4 + all_runs = Design_Matrix(sampling_freq=.5) + for i in range(num_runs): + run = Design_Matrix(np.array([ + [1,0,0,0], + [1,0,0,0], + [0,0,0,0], + [0,1,0,0], + [0,1,0,0], + [0,0,0,0], + [0,0,1,0], + [0,0,1,0], + [0,0,0,0], + [0,0,0,1], + [0,0,0,1] + ]), + sampling_freq = .5, + columns=['stim_A','stim_B','cond_C','cond_D'] + ) + run = run.add_poly(2) + all_runs = all_runs.append(run,unique_cols=['stim*','cond*']) + + assert all_runs.shape == (44, 28) diff --git a/nltools/utils.py b/nltools/utils.py index 25943169..a9dee000 100644 --- a/nltools/utils.py +++ b/nltools/utils.py @@ -205,3 +205,6 @@ def _bootstrap_apply_func(data, function, random_state=None, *args, **kwargs): size=len(data_row_id), replace=True)] return getattr(new_dat, function)( *args, **kwargs) + +class AmbiguityError(Exception): + pass