diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index d65a6ea..1334db3 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -13,10 +13,10 @@ jobs:
- uses: actions/checkout@v3
with:
submodules: 'true'
- - name: Set up Python 3.7
+ - name: Set up Python 3.8
uses: actions/setup-python@v3
with:
- python-version: "3.7"
+ python-version: "3.8"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
diff --git a/.gitignore b/.gitignore
index 02be247..2a92624 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,8 +23,9 @@ htmlcov
.vscode/
.env
-# Local scratch
+# Local data and scratch
.scratch
+example/bids-examples.b2t
# Local environment
.venv
diff --git a/bids2table/__init__.py b/bids2table/__init__.py
index 5953e40..41986b1 100644
--- a/bids2table/__init__.py
+++ b/bids2table/__init__.py
@@ -1,6 +1,20 @@
"""
-Efficiently index large-scale BIDS datasets and derivatives
+Efficiently index and query large-scale BIDS datasets and derivatives.
"""
-from ._bids2table import bids2table # noqa
+# Register elbow extension types
+import elbow.dtypes # noqa
+
+from ._b2t import bids2table
from ._version import __version__, __version_tuple__ # noqa
+from .entities import BIDSEntities, parse_bids_entities
+from .table import BIDSFile, BIDSTable, join_bids_path
+
+__all__ = [
+ "bids2table",
+ "BIDSTable",
+ "BIDSFile",
+ "BIDSEntities",
+ "parse_bids_entities",
+ "join_bids_path",
+]
diff --git a/bids2table/__main__.py b/bids2table/__main__.py
index ca83aad..d1e3e3e 100644
--- a/bids2table/__main__.py
+++ b/bids2table/__main__.py
@@ -57,12 +57,12 @@ def main():
bids2table(
root=args.root,
persistent=True,
- output=args.output,
+ index_path=args.output,
incremental=args.incremental,
overwrite=args.overwrite,
workers=args.workers,
worker_id=args.worker_id,
- return_df=False,
+ return_table=False,
)
diff --git a/bids2table/_bids2table.py b/bids2table/_b2t.py
similarity index 67%
rename from bids2table/_bids2table.py
rename to bids2table/_b2t.py
index 6c12ff4..43be2c4 100644
--- a/bids2table/_bids2table.py
+++ b/bids2table/_b2t.py
@@ -2,13 +2,13 @@
from pathlib import Path
from typing import Optional
-import pandas as pd
from elbow.builders import build_parquet, build_table
from elbow.sources.filesystem import Crawler
from elbow.typing import StrOrPath
from elbow.utils import setup_logging
from bids2table.extractors.bids import extract_bids_subdir
+from bids2table.table import BIDSTable
setup_logging()
@@ -17,21 +17,21 @@ def bids2table(
root: StrOrPath,
*,
persistent: bool = False,
- output: Optional[StrOrPath] = None,
+ index_path: Optional[StrOrPath] = None,
incremental: bool = False,
overwrite: bool = False,
workers: Optional[int] = None,
worker_id: Optional[int] = None,
- return_df: bool = True,
-) -> Optional[pd.DataFrame]:
+ return_table: bool = True,
+) -> Optional[BIDSTable]:
"""
Index a BIDS dataset directory and load as a pandas DataFrame.
Args:
root: path to BIDS dataset
persistent: whether to save index to disk as a Parquet dataset
- output: path to output Parquet dataset directory if `persistent` is
- `True`. Defaults to `root / "index.b2t".
+ index_path: path to BIDS Parquet index to generate or load. Defaults to `root /
+ "index.b2t"`. Index generation requires `persistent=True`.
incremental: update index incrementally with only new or changed files.
overwrite: overwrite previous index.
workers: number of parallel processes. If `None` or 1, run in the main
@@ -40,17 +40,19 @@ def bids2table(
worker_id: optional worker ID to use when scheduling parallel tasks externally.
Specifying the number of workers is required in this case. Incompatible with
overwrite.
- return_df: whether to return the dataframe or just build the persistent index.
+ return_table: whether to return the BIDS table or just build the persistent
+ index.
Returns:
- A DataFrame containing the BIDS Index.
+ A `BIDSTable` representing the indexed dataset(s), or `None` if `return_table`
+ is `False`.
"""
if worker_id is not None and not persistent:
raise ValueError(
"worker_id is only supported when generating a persistent index"
)
- if not (return_df or persistent):
- raise ValueError("persistent and return_df should not both be False")
+ if not (return_table or persistent):
+ raise ValueError("persistent and return_table should not both be False")
root = Path(root).expanduser().resolve()
if not root.is_dir():
@@ -64,31 +66,32 @@ def bids2table(
follow_links=True,
)
- if output is None:
- output = root / "index.b2t"
+ if index_path is None:
+ index_path = root / "index.b2t"
else:
- output = Path(output).expanduser().resolve()
+ index_path = Path(index_path).expanduser().resolve()
stale = overwrite or incremental or worker_id is not None
- if output.exists() and not stale:
- if return_df:
- logging.info("Loading cached index %s", output)
- df = pd.read_parquet(output)
+ if index_path.exists() and not stale:
+ if return_table:
+ logging.info("Loading cached index %s", index_path)
+ tab = BIDSTable.from_parquet(index_path)
else:
- logging.info("Found cached index %s; nothing to do", output)
- df = None
- return df
+ logging.info("Found cached index %s; nothing to do", index_path)
+ tab = None
+ return tab
if not persistent:
logging.info("Building index in memory")
df = build_table(source=source, extract=extract_bids_subdir)
- return df
+ tab = BIDSTable.from_df(df)
+ return tab
logging.info("Building persistent Parquet index")
build_parquet(
source=source,
extract=extract_bids_subdir,
- output=output,
+ output=index_path,
incremental=incremental,
overwrite=overwrite,
workers=workers,
@@ -96,5 +99,5 @@ def bids2table(
path_column="file__file_path",
mtime_column="file__mod_time",
)
- df = pd.read_parquet(output) if return_df else None
- return df
+ tab = BIDSTable.from_parquet(index_path) if return_table else None
+ return tab
diff --git a/bids2table/extractors/entities.py b/bids2table/entities.py
similarity index 68%
rename from bids2table/extractors/entities.py
rename to bids2table/entities.py
index 24c1903..316fcdc 100644
--- a/bids2table/extractors/entities.py
+++ b/bids2table/entities.py
@@ -1,9 +1,14 @@
+"""
+A structured representation for BIDS entities.
+"""
+
import re
import warnings
from dataclasses import asdict, dataclass, field, fields
from functools import lru_cache
from pathlib import Path
-from typing import Any, Callable, Dict, Iterable, Optional, Union
+from types import MappingProxyType
+from typing import Any, Callable, Dict, Iterable, List, Optional, Union
import pandas as pd
from elbow.typing import StrOrPath
@@ -26,6 +31,7 @@
def bids_field(
name: str,
+ display_name: str,
required: bool = False,
allowed_values: Optional[Iterable] = None,
default: Optional[Any] = None,
@@ -35,9 +41,13 @@ def bids_field(
BIDS entity dataclass field.
"""
if allowed_values is not None:
- allowed_values = set(allowed_values)
+ allowed_values = list(allowed_values)
- metadata = dict(name=name, allowed_values=allowed_values)
+ metadata = {
+ "name": name,
+ "display_name": display_name,
+ "allowed_values": allowed_values,
+ }
if required:
fld = field(metadata=metadata)
elif default_factory is not None:
@@ -60,48 +70,72 @@ class BIDSEntities:
https://bids-specification.readthedocs.io/en/stable/appendices/entities.html
"""
- sub: str = bids_field(name="Subject", required=True)
- ses: Optional[str] = bids_field(name="Session")
- sample: Optional[str] = bids_field(name="Sample")
- task: Optional[str] = bids_field(name="Task")
- acq: Optional[str] = bids_field(name="Acquisition")
- ce: Optional[str] = bids_field(name="Contrast Enhancing Agent")
- trc: Optional[str] = bids_field(name="Tracer")
- stain: Optional[str] = bids_field(name="Stain")
- rec: Optional[str] = bids_field(name="Reconstruction")
- dir: Optional[str] = bids_field(name="Phase-Encoding Direction")
- run: Optional[int] = bids_field(name="Run")
- mod: Optional[str] = bids_field(name="Corresponding Modality")
- echo: Optional[int] = bids_field(name="Echo")
- flip: Optional[int] = bids_field(name="Flip Angle")
- inv: Optional[int] = bids_field(name="Inversion Time")
+ sub: str = bids_field(name="subject", display_name="Subject", required=True)
+ ses: Optional[str] = bids_field(name="session", display_name="Session")
+ sample: Optional[str] = bids_field(name="sample", display_name="Sample")
+ task: Optional[str] = bids_field(name="task", display_name="Task")
+ acq: Optional[str] = bids_field(name="acquisition", display_name="Acquisition")
+ ce: Optional[str] = bids_field(
+ name="ceagent", display_name="Contrast Enhancing Agent"
+ )
+ trc: Optional[str] = bids_field(name="tracer", display_name="Tracer")
+ stain: Optional[str] = bids_field(name="stain", display_name="Stain")
+ rec: Optional[str] = bids_field(
+ name="reconstruction", display_name="Reconstruction"
+ )
+ dir: Optional[str] = bids_field(
+ name="direction", display_name="Phase-Encoding Direction"
+ )
+ run: Optional[int] = bids_field(name="run", display_name="Run")
+ mod: Optional[str] = bids_field(
+ name="modality", display_name="Corresponding Modality"
+ )
+ echo: Optional[int] = bids_field(name="echo", display_name="Echo")
+ flip: Optional[int] = bids_field(name="flip", display_name="Flip Angle")
+ inv: Optional[int] = bids_field(name="inversion", display_name="Inversion Time")
mt: Optional[str] = bids_field(
- name="Magnetization Transfer", allowed_values={"on", "off"}
+ name="mtransfer",
+ display_name="Magnetization Transfer",
+ allowed_values={"on", "off"},
)
part: Optional[str] = bids_field(
- name="Part", allowed_values={"mag", "phase", "real", "imag"}
+ name="part",
+ display_name="Part",
+ allowed_values={"mag", "phase", "real", "imag"},
+ )
+ proc: Optional[str] = bids_field(
+ name="processing", display_name="Processed (on device)"
+ )
+ hemi: Optional[str] = bids_field(
+ name="hemisphere", display_name="Hemisphere", allowed_values={"L", "R"}
)
- proc: Optional[str] = bids_field(name="Processed (on device)")
- hemi: Optional[str] = bids_field(name="Hemisphere", allowed_values={"L", "R"})
- space: Optional[str] = bids_field(name="Space")
- split: Optional[int] = bids_field(name="Split")
- recording: Optional[str] = bids_field(name="Recording")
- chunk: Optional[int] = bids_field(name="Chunk")
- atlas: Optional[str] = bids_field(name="Atlas")
- res: Optional[str] = bids_field(name="Resolution")
- den: Optional[str] = bids_field(name="Density")
- label: Optional[str] = bids_field(name="Label")
- desc: Optional[str] = bids_field(name="Description")
+ space: Optional[str] = bids_field(name="space", display_name="Space")
+ split: Optional[int] = bids_field(name="split", display_name="Split")
+ recording: Optional[str] = bids_field(name="recording", display_name="Recording")
+ chunk: Optional[int] = bids_field(name="chunk", display_name="Chunk")
+ atlas: Optional[str] = bids_field(name="atlas", display_name="Atlas")
+ res: Optional[str] = bids_field(name="resolution", display_name="Resolution")
+ den: Optional[str] = bids_field(name="density", display_name="Density")
+ label: Optional[str] = bids_field(name="label", display_name="Label")
+ desc: Optional[str] = bids_field(name="description", display_name="Description")
datatype: Optional[str] = bids_field(
- name="Data type", allowed_values=BIDS_DATATYPES
+ name="datatype", display_name="Data type", allowed_values=BIDS_DATATYPES
)
- suffix: Optional[str] = bids_field(name="Suffix")
- ext: Optional[str] = bids_field(name="Extension")
+ suffix: Optional[str] = bids_field(name="suffix", display_name="Suffix")
+ ext: Optional[str] = bids_field(name="extension", display_name="Extension")
extra_entities: Optional[Dict[str, Union[str, int]]] = bids_field(
- name="Extra entities",
+ name="extra_entities",
+ display_name="Extra entities",
default_factory=dict,
)
+ @staticmethod
+ def special() -> List[str]:
+ """
+ Get list of field keys which are not standard entities.
+ """
+ return ["datatype", "suffix", "ext", "extra_entities"]
+
@classmethod
def from_dict(cls, entities: Dict[str, Any], valid_only: bool = False):
"""
@@ -309,3 +343,8 @@ def parse_bids_entities(path: StrOrPath) -> Dict[str, str]:
if v is not None:
entities[k] = v
return entities
+
+
+ENTITY_NAMES_TO_KEYS = MappingProxyType(
+ {f.metadata["name"]: f.name for f in fields(BIDSEntities)}
+)
diff --git a/bids2table/extractors/__init__.py b/bids2table/extractors/__init__.py
index e69de29..05c15ea 100644
--- a/bids2table/extractors/__init__.py
+++ b/bids2table/extractors/__init__.py
@@ -0,0 +1,3 @@
+"""
+[Elbow](https://github.com/cmi-dair/elbow) extract functions for BIDS datasets.
+"""
diff --git a/bids2table/extractors/bids.py b/bids2table/extractors/bids.py
index da5363d..b2260da 100644
--- a/bids2table/extractors/bids.py
+++ b/bids2table/extractors/bids.py
@@ -7,8 +7,9 @@
from elbow.record import Record, concat
from elbow.typing import StrOrPath
+from bids2table.entities import BIDSEntities
+
from .dataset import extract_dataset
-from .entities import BIDSEntities
from .metadata import extract_metadata, is_associated_sidecar
@@ -31,7 +32,7 @@ def extract_bids_file(path: StrOrPath) -> Optional[Record]:
meta_rec = extract_metadata(path)
file_rec = extract_file_meta(path)
- rec = concat({"ds": dset_rec, "ent": entities, "meta": meta_rec, "file": file_rec})
+ rec = concat({"ds": dset_rec, "ent": entities, "meta": meta_rec, "finfo": file_rec})
return rec
diff --git a/bids2table/extractors/image.py b/bids2table/extractors/image.py
index d1155f7..38aed1e 100644
--- a/bids2table/extractors/image.py
+++ b/bids2table/extractors/image.py
@@ -7,7 +7,7 @@
from elbow.typing import StrOrPath
from nibabel.filebasedimages import ImageFileError
-from .entities import parse_bids_entities
+from bids2table.entities import parse_bids_entities
try:
import nifti
@@ -41,6 +41,9 @@ def extract_image_meta(path: StrOrPath, *, backend: str = "nibabel") -> Record:
def _read_image_meta(
path: str, backend: str = "nibabel"
) -> Tuple[Dict[str, Any], np.ndarray]:
+ header: Dict[str, Any]
+ affine: np.ndarray
+
if backend == "nifti":
if not has_nifti:
raise ModuleNotFoundError("nifti image backend not installed")
@@ -51,7 +54,12 @@ def _read_image_meta(
affine = None
else:
img = nib.load(path)
- header = dict(img.header)
+ if not isinstance(img, nib.Nifti1Image):
+ raise TypeError(
+ f"Foung image type {type(img).__name__}; only Nifti1Image supported"
+ )
+
+ header = {k: v for k, v in img.header.items()}
affine = np.asarray(img.affine)
header = {k: _cast_header_value(v) for k, v in header.items()}
diff --git a/bids2table/extractors/_inheritance.py b/bids2table/extractors/inheritance.py
similarity index 97%
rename from bids2table/extractors/_inheritance.py
rename to bids2table/extractors/inheritance.py
index 263d0d8..49a06bd 100644
--- a/bids2table/extractors/_inheritance.py
+++ b/bids2table/extractors/inheritance.py
@@ -4,8 +4,9 @@
from elbow.typing import StrOrPath
+from bids2table.entities import parse_bids_entities
+
from .dataset import is_dataset_root
-from .entities import parse_bids_entities
def find_bids_parents(
diff --git a/bids2table/extractors/metadata.py b/bids2table/extractors/metadata.py
index a0efc86..e7df27d 100644
--- a/bids2table/extractors/metadata.py
+++ b/bids2table/extractors/metadata.py
@@ -6,8 +6,9 @@
from elbow.record import Record
from elbow.typing import StrOrPath
-from ._inheritance import _glob, find_bids_parents
-from .entities import parse_bids_entities
+from bids2table.entities import parse_bids_entities
+
+from .inheritance import _glob, find_bids_parents
def extract_metadata(path: StrOrPath) -> Record:
diff --git a/bids2table/helpers.py b/bids2table/helpers.py
deleted file mode 100644
index 1b6e2eb..0000000
--- a/bids2table/helpers.py
+++ /dev/null
@@ -1,71 +0,0 @@
-from pathlib import Path
-from typing import Any, Dict, Optional, Union
-
-import pandas as pd
-
-from bids2table.extractors.entities import BIDSEntities
-
-
-def join_bids_path(
- row: Union[pd.Series, Dict[str, Any]],
- prefix: Optional[Union[str, Path]] = None,
- valid_only: bool = True,
-) -> Path:
- """
- Reconstruct a BIDS path from a table row/record or entities dict.
-
- Example::
-
- df = pd.read_parquet("dataset.parquet")
- paths = df.apply(join_bids_path, axis=1)
- """
- if "entities" in row:
- row = row["entities"]
-
- if isinstance(row, pd.Series):
- row = row.to_dict()
-
- entities = BIDSEntities.from_dict(row, valid_only=valid_only)
- path = entities.to_path(prefix=prefix, valid_only=valid_only)
- return path
-
-
-def flat_to_multi_columns(df: pd.DataFrame, sep: str = "__") -> pd.DataFrame:
- """
- Convert a flat column index to a MultiIndex by splitting on `sep`.
- """
- # Do nothing if already a MultiIndex
- if isinstance(df.columns, pd.MultiIndex):
- return df
-
- # Do nothing for empty df
- # TODO: It would probably be better if the header was initialized even if there are
- # no records.
- if len(df.columns) == 0:
- return df
-
- split_columns = [col.split(sep) for col in df.columns]
- num_levels = max(map(len, split_columns))
-
- def _pad_col(col):
- return tuple((num_levels - len(col)) * [None] + col)
-
- df = df.copy(deep=False)
- df.columns = pd.MultiIndex.from_tuples(map(_pad_col, split_columns))
- return df
-
-
-def multi_to_flat_columns(df: pd.DataFrame, sep: str = "__") -> pd.DataFrame:
- """
- Convert a column MultiIndex to a flat index by joining on `sep`.
- """
- # Do nothing if already flat
- if not isinstance(df.columns, pd.MultiIndex):
- return df
-
- columns = df.columns.to_flat_index()
- join_columns = [sep.join(col) for col in columns]
-
- df = df.copy(deep=False)
- df.columns = pd.Index(join_columns)
- return df
diff --git a/bids2table/table.py b/bids2table/table.py
new file mode 100644
index 0000000..be7cc8b
--- /dev/null
+++ b/bids2table/table.py
@@ -0,0 +1,434 @@
+from dataclasses import dataclass, field
+from functools import cached_property
+from pathlib import Path
+from typing import Any, Callable, Dict, Iterable, List, Optional, Union
+
+import pandas as pd
+
+from bids2table.entities import ENTITY_NAMES_TO_KEYS, BIDSEntities
+
+
+class BIDSTable(pd.DataFrame):
+ """
+ A table representing one or more BIDS datasets.
+
+ Each row in the table corresponds to a BIDS data file. The table is organized with
+ several groups of columns:
+
+ - **dataset** (`ds`): dataset name, relative dataset path, and the JSON dataset description
+ - **entities** (`ent`): All [valid BIDS entities](https://bids-specification.readthedocs.io/en/stable/appendices/entities.html) plus an `extra_entities` dict containing any extra entities
+ - **metadata** (`meta`): BIDS JSON metadata
+ - **file info** (`finfo`): General file info including the full file path and last modified time
+
+ It's recommended to create a `BIDSTable` using the main `bids2table.bids2table`
+ function or use one of the constructor methods:
+
+ - `BIDSTable.from_df`
+ - `BIDSTable.from_parquet`
+
+ ### Example
+
+ ```python
+ tab = BIDSTable.from_parquet("dataset/index.b2t")
+ tab = tab.sort_entities(["dataset", "sub", "ses", "task", "run"])
+ tab = (
+ tab
+ .filter("dataset", "ds001")
+ .filter("sub", items=["04", "06"])
+ .filter("RepetitionTime", 2.0)
+ )
+ # Get list of BIDSFiles
+ files = tab.files
+ ```
+ """
+
+ @cached_property
+ def nested(self) -> pd.DataFrame:
+ """
+ A copy of the table with column labels organized in a nested
+ [`MultiIndex`](https://pandas.pydata.org/docs/user_guide/advanced.html#hierarchical-indexing-multiindex).
+ """
+ # Cast back to the base class since we no longer have the full BIDS table
+ # structure.
+ return pd.DataFrame(flat_to_multi_columns(self))
+
+ @cached_property
+ def ds(self) -> pd.DataFrame:
+ """
+ The dataset (`ds`) subtable.
+ """
+ return self.nested["ds"]
+
+ @cached_property
+ def ent(self) -> pd.DataFrame:
+ """
+ The entities (`ent`) subtable.
+ """
+ return self.nested["ent"]
+
+ @cached_property
+ def meta(self) -> pd.DataFrame:
+ """
+ The metadata (`meta`) subtable.
+ """
+ return self.nested["meta"]
+
+ @cached_property
+ def finfo(self) -> pd.DataFrame:
+ """
+ The file info (`finfo`) subtable.
+ """
+ return self.nested["finfo"]
+
+ @cached_property
+ def flat(self) -> pd.DataFrame:
+ """
+ A copy of the table with subtable prefixes e.g. `ds__`, `ent__` removed.
+ """
+ return self.nested.droplevel(0, axis=1)
+
+ @cached_property
+ def flat_meta(self) -> pd.DataFrame:
+ """
+ A table of flattened JSON metadata where each metadata field is converted to its
+ own column, with nested levels separated by `'.'`.
+
+ See also:
+
+ - [`pd.json_normalize`](https://pandas.pydata.org/docs/reference/api/pandas.json_normalize.html):
+ more general function in pandas.
+ """
+ # Need to replace None with empty dict for max_level=0 to work.
+ metadata = pd.json_normalize(
+ self["meta__json"].map(lambda v: v or {}), max_level=0
+ )
+ metadata.index = self.index
+ return metadata
+
+ @cached_property
+ def files(self) -> List["BIDSFile"]:
+ """
+ Convert the table to a list of structured `BIDSFile`s.
+ """
+
+ def to_dict(val):
+ if pd.isna(val):
+ return {}
+ return dict(val)
+
+ return [
+ BIDSFile(
+ dataset=row["ds"]["dataset"],
+ root=Path(row["ds"]["dataset_path"]),
+ path=Path(row["finfo"]["file_path"]),
+ entities=BIDSEntities.from_dict(row["ent"]),
+ metadata=to_dict(row["meta"]["json"]),
+ )
+ for _, row in self.nested.iterrows()
+ ]
+
+ @cached_property
+ def datatypes(self) -> List[str]:
+ """
+ Get all datatypes present in the table.
+ """
+ return self.ent["datatype"].unique().tolist()
+
+ @cached_property
+ def modalities(self) -> List[str]:
+ """
+ Get all modalities present in the table.
+ """
+ # TODO: Is this the right way to get the modality
+ return self.ent["mod"].unique().tolist()
+
+ @cached_property
+ def subjects(self) -> List[str]:
+ """
+ Get all unique subjects in the table.
+ """
+ return self.ent["sub"].unique().tolist()
+
+ @cached_property
+ def entities(self) -> List[str]:
+ """
+ Get all entity keys with at least one non-NA entry in the table.
+ """
+ entities = self.ent.dropna(axis=1, how="all").columns.tolist()
+ special = set(BIDSEntities.special())
+ return [key for key in entities if key not in special]
+
+ def filter(
+ self,
+ key: str,
+ value: Optional[Any] = None,
+ *,
+ items: Optional[Iterable[Any]] = None,
+ contains: Optional[str] = None,
+ regex: Optional[str] = None,
+ func: Optional[Callable[[Any], bool]] = None,
+ ) -> "BIDSTable":
+ """
+ Filter the rows of the table.
+
+ Args:
+ key: Column to filter. Can be a metadata field, BIDS entity name, or any
+ unprefixed column label in the `flat` table.
+ value: Keep rows with this exact value.
+ items: Keep rows whose value is in `items`.
+ contains: Keep rows whose value contains `contains` (string only).
+ regex: Keep rows whose value matches `regex` (string only).
+ func: Apply an arbitrary function and keep values that evaluate to `True`.
+
+ Returns:
+ A filtered BIDS table.
+
+ Example::
+ filtered = (
+ tab
+ .filter("dataset", "ds001")
+ .filter("sub", items=["04", "06"])
+ .filter("RepetitionTime", 2.0)
+ )
+ """
+ # NOTE: Should be careful about reinventing a new style of query API. There are
+ # some obvious things this can't do:
+ # - comparison operators <, >, <=, >=
+ # - negation
+ # - combining filters with 'or' instead of 'and'
+ # At the bottom of this rabbit hole are more general query interfaces like those
+ # already implemented in pandas, duckdb, polars. The goal should be not to
+ # create a new one, but to make the 95% of use cases as easy as possible, and
+ # empower users to interact with the underlying table using their more powerful
+ # tool of choice if necessary.
+ if sum(k is not None for k in [value, items, contains, regex, func]) != 1:
+ raise ValueError(
+ "Exactly one of value, items, contains, regex, or func must not be None"
+ )
+
+ try:
+ # JSON metadata field
+ # NOTE: Assuming all JSON metadata fields are uppercase.
+ if key[:1].isupper():
+ col = self.flat_meta[key]
+ # Long name entity
+ elif key in ENTITY_NAMES_TO_KEYS:
+ col = self.ent[ENTITY_NAMES_TO_KEYS[key]]
+ # Any other unprefixed column
+ else:
+ col = self.flat[key]
+ except KeyError as exc:
+ raise KeyError(
+ f"Invalid key {key}; expected a valid BIDS entity or metadata field "
+ "present in the dataset"
+ ) from exc
+
+ if value is not None:
+ mask = col == value
+ elif items is not None:
+ mask = col.isin(items)
+ elif contains is not None:
+ mask = col.str.contains(contains)
+ elif regex is not None:
+ mask = col.str.match(regex)
+ else:
+ mask = col.apply(func)
+ mask = mask.fillna(False).astype(bool)
+
+ return self.loc[mask]
+
+ def filter_multi(self, **filters) -> "BIDSTable":
+ """
+ Apply multiple filters to the table sequentially.
+
+ Args:
+ filters: A mapping of column labels to queries. Each query can either be
+ a single value for an exact equality check or a `dict` for a more
+ complex query, e.g. `{"items": [1, 2, 3]}`, that's passed through to
+ `filter`.
+
+ Returns:
+ A filtered BIDS table.
+
+ Example::
+ filtered = tab.filter_multi(
+ dataset="ds001"
+ sub={"items": ["04", "06"]},
+ RepetitionTime=2.5,
+ )
+ """
+ tab = self.copy(deep=False)
+
+ for k, query in filters.items():
+ if not isinstance(query, dict):
+ query = {"value": query}
+ tab = tab.filter(k, **query)
+ return tab
+
+ def sort_entities(
+ self, by: Union[str, List[str]], inplace: bool = False
+ ) -> "BIDSTable":
+ """
+ Sort the values of the table by entities.
+
+ Args:
+ by: label or list of labels. Can be `"dataset"` or a short or long entity
+ name.
+ inplace: sort the table in place
+
+ Returns:
+ A sorted BIDS table.
+ """
+ if isinstance(by, str):
+ by = [by]
+
+ # TODO: what about sorting by other columns, e.g. file_path?
+ def add_prefix(k: str):
+ if k == "dataset":
+ k = f"ds__{k}"
+ elif k in ENTITY_NAMES_TO_KEYS:
+ k = f"ent__{ENTITY_NAMES_TO_KEYS[k]}"
+ else:
+ k = f"ent__{k}"
+ return k
+
+ by = [add_prefix(k) for k in by]
+ out = self.sort_values(by, inplace=inplace)
+ if inplace:
+ return self
+ return out
+
+ @classmethod
+ def from_df(cls, df: pd.DataFrame) -> "BIDSTable":
+ """
+ Create a BIDS table from a pandas `DataFrame` generated by `bids2table`.
+ """
+ return cls(df)
+
+ @classmethod
+ def from_parquet(cls, path: Path) -> "BIDSTable":
+ """
+ Read a BIDS table from a Parquet file or dataset directory generated by
+ `bids2table`.
+ """
+ df = pd.read_parquet(path)
+ return cls.from_df(df)
+
+ @property
+ def _constructor(self):
+ # Makes sure that dataframe slices return a subclass instance
+ # https://pandas.pydata.org/docs/development/extending.html#override-constructor-properties
+ return BIDSTable
+
+
+@dataclass
+class BIDSFile:
+ """
+ A structured BIDS file.
+ """
+
+ dataset: str
+ """Parent BIDS dataset."""
+ root: Path
+ """Path to parent dataset."""
+ path: Path
+ """File path."""
+ entities: BIDSEntities
+ """BIDS entities."""
+ metadata: Dict[str, Any] = field(default_factory=dict)
+ """BIDS JSON metadata."""
+
+ @property
+ def relative_path(self) -> Path:
+ """
+ The file path relative to the dataset root.
+ """
+ return self.path.relative_to(self.root)
+
+
+def flat_to_multi_columns(df: pd.DataFrame, sep: str = "__") -> pd.DataFrame:
+ """
+ Convert a flat column index to a MultiIndex by splitting on `sep`.
+ """
+ # Do nothing if already a MultiIndex
+ if isinstance(df.columns, pd.MultiIndex):
+ return df
+
+ # Do nothing for empty df
+ # TODO: It would probably be better if the header was initialized even if there are
+ # no records.
+ if len(df.columns) == 0:
+ return df
+
+ split_columns = [col.split(sep) for col in df.columns]
+ num_levels = max(map(len, split_columns))
+
+ def _pad_col(col):
+ return tuple((num_levels - len(col)) * [None] + col)
+
+ df = df.copy(deep=False)
+ df.columns = pd.MultiIndex.from_tuples(map(_pad_col, split_columns))
+ return df
+
+
+def multi_to_flat_columns(df: pd.DataFrame, sep: str = "__") -> pd.DataFrame:
+ """
+ Convert a column MultiIndex to a flat index by joining on `sep`.
+ """
+ # Do nothing if already flat
+ if not isinstance(df.columns, pd.MultiIndex):
+ return df
+
+ columns = df.columns.to_flat_index()
+ join_columns = [sep.join(col) for col in columns]
+
+ df = df.copy(deep=False)
+ df.columns = pd.Index(join_columns)
+ return df
+
+
+def join_bids_path(
+ row: Union[pd.Series, Dict[str, Any]],
+ prefix: Optional[Union[str, Path]] = None,
+ valid_only: bool = True,
+) -> Path:
+ """
+ Reconstruct a BIDS path from a table row or entities dict.
+
+ Args:
+ row: row from a `BIDSTable` or `BIDSTable.ent` subtable.
+ prefix: output file prefix path.
+ valid_only: only include valid BIDS entities.
+
+ Example::
+
+ tab = BIDSTable.from_parquet("dataset/index.b2t")
+ paths = tab.apply(join_bids_path, axis=1)
+ """
+ # Filter in case input is a row from the raw dataframe and not the entities group.
+ row = _filter_row(row, group="ent")
+ entities = BIDSEntities.from_dict(row, valid_only=valid_only)
+ path = entities.to_path(prefix=prefix, valid_only=valid_only)
+ return path
+
+
+def _filter_row(
+ row: Union[pd.Series, Dict[str, Any]], group: str, sep: str = "__"
+) -> Dict[str, Any]:
+ """
+ Filter a table row for fields from a particular group. Keeps all fields without a
+ group prefix.
+ """
+ prefix = f"{group}{sep}"
+ return {
+ _removeprefix(k, prefix): v
+ for k, v in row.items()
+ if k.startswith(prefix) or sep not in k
+ }
+
+
+def _removeprefix(s: str, prefix: str) -> str:
+ # same as str.removeprefix(), which was introduced in 3.9
+ if s.startswith(prefix):
+ s = s[len(prefix) :]
+ return s
diff --git a/example/example.ipynb b/example/example.ipynb
index 5544473..d269258 100644
--- a/example/example.ipynb
+++ b/example/example.ipynb
@@ -6,12 +6,9 @@
"metadata": {},
"outputs": [],
"source": [
- "# Required to load columns with extension types, e.g. json type\n",
- "import elbow.dtypes\n",
"import pandas as pd\n",
"\n",
- "from bids2table import bids2table\n",
- "from bids2table.helpers import flat_to_multi_columns"
+ "from bids2table import bids2table"
]
},
{
@@ -45,15 +42,22 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "176it [00:01, 92.58it/s, tot=176, good=176, rec=2245, err=0] \n",
- "197it [00:02, 90.73it/s, tot=197, good=197, rec=2663, err=0] \n",
- "203it [00:02, 93.22it/s, tot=203, good=203, rec=2630, err=0]\n",
- "204it [00:02, 92.01it/s, tot=204, good=204, rec=2728, err=0] \n"
+ "193it [00:00, 318.09it/s, tot=193, good=193, rec=2386, err=0]\n",
+ "172it [00:00, 288.23it/s, tot=172, good=172, rec=2240, err=0]\n",
+ "202it [00:00, 287.97it/s, tot=202, good=202, rec=2828, err=0]\n",
+ "213it [00:00, 300.22it/s, tot=213, good=213, rec=2812, err=0]\n"
]
}
],
"source": [
- "df = bids2table(root=\"../bids-examples\", persistent=True, overwrite=True, workers=4)"
+ "bids2table(\n",
+ " root=\"../bids-examples\",\n",
+ " index_path=\"bids-examples.b2t\",\n",
+ " persistent=True,\n",
+ " overwrite=True,\n",
+ " workers=4,\n",
+ " return_table=False,\n",
+ ")"
]
},
{
@@ -77,16 +81,16 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "total 1992\n",
- "-rw------- 1 clane staff 248K Aug 4 12:34 part-20230804123438-0003-of-0004.parquet\n",
- "-rw------- 1 clane staff 247K Aug 4 12:34 part-20230804123438-0002-of-0004.parquet\n",
- "-rw------- 1 clane staff 175K Aug 4 12:34 part-20230804123438-0001-of-0004.parquet\n",
- "-rw------- 1 clane staff 161K Aug 4 12:34 part-20230804123438-0000-of-0004.parquet\n"
+ "total 1608\n",
+ "-rw------- 1 clane staff 197K Aug 9 06:17 part-20230809061750-0002-of-0004.parquet\n",
+ "-rw------- 1 clane staff 240K Aug 9 06:17 part-20230809061750-0003-of-0004.parquet\n",
+ "-rw------- 1 clane staff 167K Aug 9 06:17 part-20230809061750-0000-of-0004.parquet\n",
+ "-rw------- 1 clane staff 194K Aug 9 06:17 part-20230809061750-0001-of-0004.parquet\n"
]
}
],
"source": [
- "! ls -lht ../bids-examples/index.b2t"
+ "! ls -lht bids-examples.b2t/"
]
},
{
@@ -103,7 +107,7 @@
"- dataset (`ds__*`): dataset name, relative dataset path, and the JSON dataset description\n",
"- entities (`ent__*`): All [valid BIDS entities](https://bids-specification.readthedocs.io/en/stable/appendices/entities.html) plus an `extra_entities` dict containing any extra entities\n",
"- metadata (`meta__*`): BIDS JSON metadata\n",
- "- file (`file__*`): General file metadata including the full file path and last modified time"
+ "- file info (`finfo__*`): General file info including the full file path and last modified time"
]
},
{
@@ -111,6 +115,13 @@
"execution_count": 5,
"metadata": {},
"outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Shape: (10266, 40)\n"
+ ]
+ },
{
"data": {
"text/html": [
@@ -169,19 +180,19 @@
"
ent__ext | \n",
" ent__extra_entities | \n",
" meta__json | \n",
- " file__file_path | \n",
- " file__link_target | \n",
- " file__mod_time | \n",
+ " finfo__file_path | \n",
+ " finfo__link_target | \n",
+ " finfo__mod_time | \n",
" \n",
" \n",
" \n",
" \n",
" 0 | \n",
- " asl002 | \n",
- " raw | \n",
- " /Users/clane/Projects/ScalableQC/code/bids2tab... | \n",
- " {'Name': 'ASL_Philips_PCASL_2DEPI', 'BIDSVersi... | \n",
- " Sub103 | \n",
+ " ds002 | \n",
+ " None | \n",
+ " /Users/clane/Projects/B2T/bids2table/bids-exam... | \n",
+ " {'BIDSVersion': '1.0.0', 'License': 'This data... | \n",
+ " 15 | \n",
" None | \n",
" None | \n",
" None | \n",
@@ -213,18 +224,18 @@
" T1w | \n",
" .nii.gz | \n",
" {} | \n",
- " {'Manufacturer': 'Philips', 'ManufacturersMode... | \n",
- " /Users/clane/Projects/ScalableQC/code/bids2tab... | \n",
" None | \n",
- " 1.687883e+09 | \n",
+ " /Users/clane/Projects/B2T/bids2table/bids-exam... | \n",
+ " None | \n",
+ " 1.691420e+09 | \n",
"
\n",
" \n",
" 1 | \n",
- " asl002 | \n",
- " raw | \n",
- " /Users/clane/Projects/ScalableQC/code/bids2tab... | \n",
- " {'Name': 'ASL_Philips_PCASL_2DEPI', 'BIDSVersi... | \n",
- " Sub103 | \n",
+ " ds002 | \n",
+ " None | \n",
+ " /Users/clane/Projects/B2T/bids2table/bids-exam... | \n",
+ " {'BIDSVersion': '1.0.0', 'License': 'This data... | \n",
+ " 15 | \n",
" None | \n",
" None | \n",
" None | \n",
@@ -252,32 +263,32 @@
" None | \n",
" None | \n",
" None | \n",
- " perf | \n",
- " m0scan | \n",
+ " anat | \n",
+ " inplaneT2 | \n",
" .nii.gz | \n",
" {} | \n",
- " {'Manufacturer': 'Philips', 'ManufacturersMode... | \n",
- " /Users/clane/Projects/ScalableQC/code/bids2tab... | \n",
" None | \n",
- " 1.687883e+09 | \n",
+ " /Users/clane/Projects/B2T/bids2table/bids-exam... | \n",
+ " None | \n",
+ " 1.691420e+09 | \n",
"
\n",
" \n",
" 2 | \n",
- " asl002 | \n",
- " raw | \n",
- " /Users/clane/Projects/ScalableQC/code/bids2tab... | \n",
- " {'Name': 'ASL_Philips_PCASL_2DEPI', 'BIDSVersi... | \n",
- " Sub103 | \n",
+ " ds002 | \n",
" None | \n",
+ " /Users/clane/Projects/B2T/bids2table/bids-exam... | \n",
+ " {'BIDSVersion': '1.0.0', 'License': 'This data... | \n",
+ " 15 | \n",
" None | \n",
" None | \n",
+ " probabilisticclassification | \n",
" None | \n",
" None | \n",
" None | \n",
" None | \n",
" None | \n",
" None | \n",
- " NaN | \n",
+ " 1.0 | \n",
" None | \n",
" NaN | \n",
" NaN | \n",
@@ -295,74 +306,74 @@
" None | \n",
" None | \n",
" None | \n",
- " perf | \n",
- " asl | \n",
+ " func | \n",
+ " bold | \n",
" .nii.gz | \n",
" {} | \n",
- " {'Manufacturer': 'Philips', 'ManufacturersMode... | \n",
- " /Users/clane/Projects/ScalableQC/code/bids2tab... | \n",
+ " {'RepetitionTime': 2.0, 'TaskName': 'probabili... | \n",
+ " /Users/clane/Projects/B2T/bids2table/bids-exam... | \n",
" None | \n",
- " 1.687883e+09 | \n",
+ " 1.691420e+09 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " ds__dataset ds__dataset_type \n",
- "0 asl002 raw \\\n",
- "1 asl002 raw \n",
- "2 asl002 raw \n",
- "\n",
- " ds__dataset_path \n",
- "0 /Users/clane/Projects/ScalableQC/code/bids2tab... \\\n",
- "1 /Users/clane/Projects/ScalableQC/code/bids2tab... \n",
- "2 /Users/clane/Projects/ScalableQC/code/bids2tab... \n",
- "\n",
- " ds__dataset_description ent__sub ent__ses \n",
- "0 {'Name': 'ASL_Philips_PCASL_2DEPI', 'BIDSVersi... Sub103 None \\\n",
- "1 {'Name': 'ASL_Philips_PCASL_2DEPI', 'BIDSVersi... Sub103 None \n",
- "2 {'Name': 'ASL_Philips_PCASL_2DEPI', 'BIDSVersi... Sub103 None \n",
- "\n",
- " ent__sample ent__task ent__acq ent__ce ent__trc ent__stain ent__rec \n",
- "0 None None None None None None None \\\n",
- "1 None None None None None None None \n",
- "2 None None None None None None None \n",
- "\n",
- " ent__dir ent__run ent__mod ent__echo ent__flip ent__inv ent__mt \n",
- "0 None NaN None NaN NaN NaN None \\\n",
- "1 None NaN None NaN NaN NaN None \n",
- "2 None NaN None NaN NaN NaN None \n",
- "\n",
- " ent__part ent__proc ent__hemi ent__space ent__split ent__recording \n",
- "0 None None None None NaN None \\\n",
- "1 None None None None NaN None \n",
- "2 None None None None NaN None \n",
- "\n",
- " ent__chunk ent__atlas ent__res ent__den ent__label ent__desc ent__datatype \n",
- "0 NaN None None None None None anat \\\n",
- "1 NaN None None None None None perf \n",
- "2 NaN None None None None None perf \n",
- "\n",
- " ent__suffix ent__ext ent__extra_entities \n",
- "0 T1w .nii.gz {} \\\n",
- "1 m0scan .nii.gz {} \n",
- "2 asl .nii.gz {} \n",
- "\n",
- " meta__json \n",
- "0 {'Manufacturer': 'Philips', 'ManufacturersMode... \\\n",
- "1 {'Manufacturer': 'Philips', 'ManufacturersMode... \n",
- "2 {'Manufacturer': 'Philips', 'ManufacturersMode... \n",
- "\n",
- " file__file_path file__link_target \n",
- "0 /Users/clane/Projects/ScalableQC/code/bids2tab... None \\\n",
- "1 /Users/clane/Projects/ScalableQC/code/bids2tab... None \n",
- "2 /Users/clane/Projects/ScalableQC/code/bids2tab... None \n",
- "\n",
- " file__mod_time \n",
- "0 1.687883e+09 \n",
- "1 1.687883e+09 \n",
- "2 1.687883e+09 "
+ " ds__dataset ds__dataset_type \\\n",
+ "0 ds002 None \n",
+ "1 ds002 None \n",
+ "2 ds002 None \n",
+ "\n",
+ " ds__dataset_path \\\n",
+ "0 /Users/clane/Projects/B2T/bids2table/bids-exam... \n",
+ "1 /Users/clane/Projects/B2T/bids2table/bids-exam... \n",
+ "2 /Users/clane/Projects/B2T/bids2table/bids-exam... \n",
+ "\n",
+ " ds__dataset_description ent__sub ent__ses \\\n",
+ "0 {'BIDSVersion': '1.0.0', 'License': 'This data... 15 None \n",
+ "1 {'BIDSVersion': '1.0.0', 'License': 'This data... 15 None \n",
+ "2 {'BIDSVersion': '1.0.0', 'License': 'This data... 15 None \n",
+ "\n",
+ " ent__sample ent__task ent__acq ent__ce ent__trc \\\n",
+ "0 None None None None None \n",
+ "1 None None None None None \n",
+ "2 None probabilisticclassification None None None \n",
+ "\n",
+ " ent__stain ent__rec ent__dir ent__run ent__mod ent__echo ent__flip \\\n",
+ "0 None None None NaN None NaN NaN \n",
+ "1 None None None NaN None NaN NaN \n",
+ "2 None None None 1.0 None NaN NaN \n",
+ "\n",
+ " ent__inv ent__mt ent__part ent__proc ent__hemi ent__space ent__split \\\n",
+ "0 NaN None None None None None NaN \n",
+ "1 NaN None None None None None NaN \n",
+ "2 NaN None None None None None NaN \n",
+ "\n",
+ " ent__recording ent__chunk ent__atlas ent__res ent__den ent__label \\\n",
+ "0 None NaN None None None None \n",
+ "1 None NaN None None None None \n",
+ "2 None NaN None None None None \n",
+ "\n",
+ " ent__desc ent__datatype ent__suffix ent__ext ent__extra_entities \\\n",
+ "0 None anat T1w .nii.gz {} \n",
+ "1 None anat inplaneT2 .nii.gz {} \n",
+ "2 None func bold .nii.gz {} \n",
+ "\n",
+ " meta__json \\\n",
+ "0 None \n",
+ "1 None \n",
+ "2 {'RepetitionTime': 2.0, 'TaskName': 'probabili... \n",
+ "\n",
+ " finfo__file_path finfo__link_target \\\n",
+ "0 /Users/clane/Projects/B2T/bids2table/bids-exam... None \n",
+ "1 /Users/clane/Projects/B2T/bids2table/bids-exam... None \n",
+ "2 /Users/clane/Projects/B2T/bids2table/bids-exam... None \n",
+ "\n",
+ " finfo__mod_time \n",
+ "0 1.691420e+09 \n",
+ "1 1.691420e+09 \n",
+ "2 1.691420e+09 "
]
},
"execution_count": 5,
@@ -371,22 +382,182 @@
}
],
"source": [
- "df = bids2table(\"../bids-examples\")\n",
- "\n",
- "df.head(3)"
+ "tab = bids2table(\"../bids-examples\", index_path=\"bids-examples.b2t\")\n",
+ "print(\"Shape:\", tab.shape)\n",
+ "tab.head(3)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "You can also split the columns into a pandas [`MultiIndex`](https://pandas.pydata.org/docs/user_guide/advanced.html) using the helper function `flat_to_multi_columns()`."
+ "Now let's look at the column types.\n",
+ "\n",
+ "> TODO: not all types are preserved when converting parquet to pandas. In particular, strings are mapped to objects and ints with None to float with NaN."
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ds__dataset | \n",
+ " ds__dataset_type | \n",
+ " ds__dataset_path | \n",
+ " ds__dataset_description | \n",
+ " ent__sub | \n",
+ " ent__ses | \n",
+ " ent__sample | \n",
+ " ent__task | \n",
+ " ent__acq | \n",
+ " ent__ce | \n",
+ " ent__trc | \n",
+ " ent__stain | \n",
+ " ent__rec | \n",
+ " ent__dir | \n",
+ " ent__run | \n",
+ " ent__mod | \n",
+ " ent__echo | \n",
+ " ent__flip | \n",
+ " ent__inv | \n",
+ " ent__mt | \n",
+ " ent__part | \n",
+ " ent__proc | \n",
+ " ent__hemi | \n",
+ " ent__space | \n",
+ " ent__split | \n",
+ " ent__recording | \n",
+ " ent__chunk | \n",
+ " ent__atlas | \n",
+ " ent__res | \n",
+ " ent__den | \n",
+ " ent__label | \n",
+ " ent__desc | \n",
+ " ent__datatype | \n",
+ " ent__suffix | \n",
+ " ent__ext | \n",
+ " ent__extra_entities | \n",
+ " meta__json | \n",
+ " finfo__file_path | \n",
+ " finfo__link_target | \n",
+ " finfo__mod_time | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " object | \n",
+ " object | \n",
+ " object | \n",
+ " json | \n",
+ " object | \n",
+ " object | \n",
+ " object | \n",
+ " object | \n",
+ " object | \n",
+ " object | \n",
+ " object | \n",
+ " object | \n",
+ " object | \n",
+ " object | \n",
+ " float64 | \n",
+ " object | \n",
+ " float64 | \n",
+ " float64 | \n",
+ " float64 | \n",
+ " object | \n",
+ " object | \n",
+ " object | \n",
+ " object | \n",
+ " object | \n",
+ " float64 | \n",
+ " object | \n",
+ " float64 | \n",
+ " object | \n",
+ " object | \n",
+ " object | \n",
+ " object | \n",
+ " object | \n",
+ " object | \n",
+ " object | \n",
+ " object | \n",
+ " json | \n",
+ " json | \n",
+ " object | \n",
+ " object | \n",
+ " float64 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ds__dataset ds__dataset_type ds__dataset_path ds__dataset_description \\\n",
+ "0 object object object json \n",
+ "\n",
+ " ent__sub ent__ses ent__sample ent__task ent__acq ent__ce ent__trc \\\n",
+ "0 object object object object object object object \n",
+ "\n",
+ " ent__stain ent__rec ent__dir ent__run ent__mod ent__echo ent__flip ent__inv \\\n",
+ "0 object object object float64 object float64 float64 float64 \n",
+ "\n",
+ " ent__mt ent__part ent__proc ent__hemi ent__space ent__split ent__recording \\\n",
+ "0 object object object object object float64 object \n",
+ "\n",
+ " ent__chunk ent__atlas ent__res ent__den ent__label ent__desc ent__datatype \\\n",
+ "0 float64 object object object object object object \n",
+ "\n",
+ " ent__suffix ent__ext ent__extra_entities meta__json finfo__file_path \\\n",
+ "0 object object json json object \n",
+ "\n",
+ " finfo__link_target finfo__mod_time \n",
+ "0 object float64 "
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "schema = pd.DataFrame.from_records([tab.dtypes.to_dict()])\n",
+ "schema"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The dataframe returned by `bids2table` is in fact a special `BIDSTable` subclass of `pandas.DataFrame` with a few extra helper methods.\n",
+ "\n",
+ "- You can view the table with [nested columns](https://pandas.pydata.org/docs/user_guide/advanced.html#hierarchical-indexing-multiindex)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
"outputs": [
{
"data": {
@@ -412,7 +583,7 @@
" ds | \n",
" ent | \n",
" meta | \n",
- " file | \n",
+ " finfo | \n",
" \n",
" \n",
" | \n",
@@ -461,11 +632,11 @@
"
\n",
" \n",
" 0 | \n",
- " asl002 | \n",
- " raw | \n",
- " /Users/clane/Projects/ScalableQC/code/bids2tab... | \n",
- " {'Name': 'ASL_Philips_PCASL_2DEPI', 'BIDSVersi... | \n",
- " Sub103 | \n",
+ " ds002 | \n",
+ " None | \n",
+ " /Users/clane/Projects/B2T/bids2table/bids-exam... | \n",
+ " {'BIDSVersion': '1.0.0', 'License': 'This data... | \n",
+ " 15 | \n",
" None | \n",
" None | \n",
" None | \n",
@@ -497,18 +668,18 @@
" T1w | \n",
" .nii.gz | \n",
" {} | \n",
- " {'Manufacturer': 'Philips', 'ManufacturersMode... | \n",
- " /Users/clane/Projects/ScalableQC/code/bids2tab... | \n",
" None | \n",
- " 1.687883e+09 | \n",
+ " /Users/clane/Projects/B2T/bids2table/bids-exam... | \n",
+ " None | \n",
+ " 1.691420e+09 | \n",
"
\n",
" \n",
" 1 | \n",
- " asl002 | \n",
- " raw | \n",
- " /Users/clane/Projects/ScalableQC/code/bids2tab... | \n",
- " {'Name': 'ASL_Philips_PCASL_2DEPI', 'BIDSVersi... | \n",
- " Sub103 | \n",
+ " ds002 | \n",
+ " None | \n",
+ " /Users/clane/Projects/B2T/bids2table/bids-exam... | \n",
+ " {'BIDSVersion': '1.0.0', 'License': 'This data... | \n",
+ " 15 | \n",
" None | \n",
" None | \n",
" None | \n",
@@ -536,32 +707,32 @@
" None | \n",
" None | \n",
" None | \n",
- " perf | \n",
- " m0scan | \n",
+ " anat | \n",
+ " inplaneT2 | \n",
" .nii.gz | \n",
" {} | \n",
- " {'Manufacturer': 'Philips', 'ManufacturersMode... | \n",
- " /Users/clane/Projects/ScalableQC/code/bids2tab... | \n",
" None | \n",
- " 1.687883e+09 | \n",
+ " /Users/clane/Projects/B2T/bids2table/bids-exam... | \n",
+ " None | \n",
+ " 1.691420e+09 | \n",
"
\n",
" \n",
" 2 | \n",
- " asl002 | \n",
- " raw | \n",
- " /Users/clane/Projects/ScalableQC/code/bids2tab... | \n",
- " {'Name': 'ASL_Philips_PCASL_2DEPI', 'BIDSVersi... | \n",
- " Sub103 | \n",
+ " ds002 | \n",
" None | \n",
+ " /Users/clane/Projects/B2T/bids2table/bids-exam... | \n",
+ " {'BIDSVersion': '1.0.0', 'License': 'This data... | \n",
+ " 15 | \n",
" None | \n",
" None | \n",
+ " probabilisticclassification | \n",
" None | \n",
" None | \n",
" None | \n",
" None | \n",
" None | \n",
" None | \n",
- " NaN | \n",
+ " 1.0 | \n",
" None | \n",
" NaN | \n",
" NaN | \n",
@@ -579,84 +750,82 @@
" None | \n",
" None | \n",
" None | \n",
- " perf | \n",
- " asl | \n",
+ " func | \n",
+ " bold | \n",
" .nii.gz | \n",
" {} | \n",
- " {'Manufacturer': 'Philips', 'ManufacturersMode... | \n",
- " /Users/clane/Projects/ScalableQC/code/bids2tab... | \n",
+ " {'RepetitionTime': 2.0, 'TaskName': 'probabili... | \n",
+ " /Users/clane/Projects/B2T/bids2table/bids-exam... | \n",
" None | \n",
- " 1.687883e+09 | \n",
+ " 1.691420e+09 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " ds \n",
+ " ds \\\n",
" dataset dataset_type dataset_path \n",
- "0 asl002 raw /Users/clane/Projects/ScalableQC/code/bids2tab... \\\n",
- "1 asl002 raw /Users/clane/Projects/ScalableQC/code/bids2tab... \n",
- "2 asl002 raw /Users/clane/Projects/ScalableQC/code/bids2tab... \n",
- "\n",
- " ent \n",
- " dataset_description sub ses sample \n",
- "0 {'Name': 'ASL_Philips_PCASL_2DEPI', 'BIDSVersi... Sub103 None None \\\n",
- "1 {'Name': 'ASL_Philips_PCASL_2DEPI', 'BIDSVersi... Sub103 None None \n",
- "2 {'Name': 'ASL_Philips_PCASL_2DEPI', 'BIDSVersi... Sub103 None None \n",
- "\n",
- " \n",
- " task acq ce trc stain rec dir run mod echo flip inv mt \n",
- "0 None None None None None None None NaN None NaN NaN NaN None \\\n",
- "1 None None None None None None None NaN None NaN NaN NaN None \n",
- "2 None None None None None None None NaN None NaN NaN NaN None \n",
- "\n",
- " \n",
- " part proc hemi space split recording chunk atlas res den label desc \n",
- "0 None None None None NaN None NaN None None None None None \\\n",
- "1 None None None None NaN None NaN None None None None None \n",
- "2 None None None None NaN None NaN None None None None None \n",
- "\n",
- " \n",
- " datatype suffix ext extra_entities \n",
- "0 anat T1w .nii.gz {} \\\n",
- "1 perf m0scan .nii.gz {} \n",
- "2 perf asl .nii.gz {} \n",
- "\n",
- " meta \n",
+ "0 ds002 None /Users/clane/Projects/B2T/bids2table/bids-exam... \n",
+ "1 ds002 None /Users/clane/Projects/B2T/bids2table/bids-exam... \n",
+ "2 ds002 None /Users/clane/Projects/B2T/bids2table/bids-exam... \n",
+ "\n",
+ " ent \\\n",
+ " dataset_description sub ses sample \n",
+ "0 {'BIDSVersion': '1.0.0', 'License': 'This data... 15 None None \n",
+ "1 {'BIDSVersion': '1.0.0', 'License': 'This data... 15 None None \n",
+ "2 {'BIDSVersion': '1.0.0', 'License': 'This data... 15 None None \n",
+ "\n",
+ " \\\n",
+ " task acq ce trc stain rec dir run mod \n",
+ "0 None None None None None None None NaN None \n",
+ "1 None None None None None None None NaN None \n",
+ "2 probabilisticclassification None None None None None None 1.0 None \n",
+ "\n",
+ " \\\n",
+ " echo flip inv mt part proc hemi space split recording chunk atlas \n",
+ "0 NaN NaN NaN None None None None None NaN None NaN None \n",
+ "1 NaN NaN NaN None None None None None NaN None NaN None \n",
+ "2 NaN NaN NaN None None None None None NaN None NaN None \n",
+ "\n",
+ " \\\n",
+ " res den label desc datatype suffix ext extra_entities \n",
+ "0 None None None None anat T1w .nii.gz {} \n",
+ "1 None None None None anat inplaneT2 .nii.gz {} \n",
+ "2 None None None None func bold .nii.gz {} \n",
+ "\n",
+ " meta \\\n",
" json \n",
- "0 {'Manufacturer': 'Philips', 'ManufacturersMode... \\\n",
- "1 {'Manufacturer': 'Philips', 'ManufacturersMode... \n",
- "2 {'Manufacturer': 'Philips', 'ManufacturersMode... \n",
+ "0 None \n",
+ "1 None \n",
+ "2 {'RepetitionTime': 2.0, 'TaskName': 'probabili... \n",
"\n",
- " file \n",
+ " finfo \n",
" file_path link_target mod_time \n",
- "0 /Users/clane/Projects/ScalableQC/code/bids2tab... None 1.687883e+09 \n",
- "1 /Users/clane/Projects/ScalableQC/code/bids2tab... None 1.687883e+09 \n",
- "2 /Users/clane/Projects/ScalableQC/code/bids2tab... None 1.687883e+09 "
+ "0 /Users/clane/Projects/B2T/bids2table/bids-exam... None 1.691420e+09 \n",
+ "1 /Users/clane/Projects/B2T/bids2table/bids-exam... None 1.691420e+09 \n",
+ "2 /Users/clane/Projects/B2T/bids2table/bids-exam... None 1.691420e+09 "
]
},
- "execution_count": 6,
+ "execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "df_multi = flat_to_multi_columns(df)\n",
- "\n",
- "df_multi.head(3)"
+ "tab.nested.head(3)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "This makes it easy to extract just a single group of columns, e.g. the BIDS entities."
+ "- You can easily access the dataset (`ds`), entities (`ent`), metadata (`meta`), or file info (`finfo`) subtables."
]
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 8,
"metadata": {},
"outputs": [
{
@@ -717,7 +886,7 @@
" \n",
" \n",
" 0 | \n",
- " Sub103 | \n",
+ " 15 | \n",
" None | \n",
" None | \n",
" None | \n",
@@ -752,7 +921,7 @@
"
\n",
" \n",
" 1 | \n",
- " Sub103 | \n",
+ " 15 | \n",
" None | \n",
" None | \n",
" None | \n",
@@ -780,24 +949,24 @@
" None | \n",
" None | \n",
" None | \n",
- " perf | \n",
- " m0scan | \n",
+ " anat | \n",
+ " inplaneT2 | \n",
" .nii.gz | \n",
" {} | \n",
"
\n",
" \n",
" 2 | \n",
- " Sub103 | \n",
- " None | \n",
+ " 15 | \n",
" None | \n",
" None | \n",
+ " probabilisticclassification | \n",
" None | \n",
" None | \n",
" None | \n",
" None | \n",
" None | \n",
" None | \n",
- " NaN | \n",
+ " 1.0 | \n",
" None | \n",
" NaN | \n",
" NaN | \n",
@@ -815,8 +984,8 @@
" None | \n",
" None | \n",
" None | \n",
- " perf | \n",
- " asl | \n",
+ " func | \n",
+ " bold | \n",
" .nii.gz | \n",
" {} | \n",
"
\n",
@@ -825,43 +994,46 @@
""
],
"text/plain": [
- " sub ses sample task acq ce trc stain rec dir run mod \n",
- "0 Sub103 None None None None None None None None None NaN None \\\n",
- "1 Sub103 None None None None None None None None None NaN None \n",
- "2 Sub103 None None None None None None None None None NaN None \n",
+ " sub ses sample task acq ce trc stain rec \\\n",
+ "0 15 None None None None None None None None \n",
+ "1 15 None None None None None None None None \n",
+ "2 15 None None probabilisticclassification None None None None None \n",
"\n",
- " echo flip inv mt part proc hemi space split recording chunk \n",
- "0 NaN NaN NaN None None None None None NaN None NaN \\\n",
- "1 NaN NaN NaN None None None None None NaN None NaN \n",
- "2 NaN NaN NaN None None None None None NaN None NaN \n",
+ " dir run mod echo flip inv mt part proc hemi space split \\\n",
+ "0 None NaN None NaN NaN NaN None None None None None NaN \n",
+ "1 None NaN None NaN NaN NaN None None None None None NaN \n",
+ "2 None 1.0 None NaN NaN NaN None None None None None NaN \n",
"\n",
- " atlas res den label desc datatype suffix ext extra_entities \n",
- "0 None None None None None anat T1w .nii.gz {} \n",
- "1 None None None None None perf m0scan .nii.gz {} \n",
- "2 None None None None None perf asl .nii.gz {} "
+ " recording chunk atlas res den label desc datatype suffix ext \\\n",
+ "0 None NaN None None None None None anat T1w .nii.gz \n",
+ "1 None NaN None None None None None anat inplaneT2 .nii.gz \n",
+ "2 None NaN None None None None None func bold .nii.gz \n",
+ "\n",
+ " extra_entities \n",
+ "0 {} \n",
+ "1 {} \n",
+ "2 {} "
]
},
- "execution_count": 7,
+ "execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "entities = df_multi[\"ent\"]\n",
- "\n",
- "entities.head(3)"
+ "tab.ent.head(3)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "We can also drop the first level of the column multi-index for shorter column names."
+ "- You can view the full table without the group prefixes"
]
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 9,
"metadata": {},
"outputs": [
{
@@ -930,11 +1102,11 @@
" \n",
" \n",
" 0 | \n",
- " asl002 | \n",
- " raw | \n",
- " /Users/clane/Projects/ScalableQC/code/bids2tab... | \n",
- " {'Name': 'ASL_Philips_PCASL_2DEPI', 'BIDSVersi... | \n",
- " Sub103 | \n",
+ " ds002 | \n",
+ " None | \n",
+ " /Users/clane/Projects/B2T/bids2table/bids-exam... | \n",
+ " {'BIDSVersion': '1.0.0', 'License': 'This data... | \n",
+ " 15 | \n",
" None | \n",
" None | \n",
" None | \n",
@@ -966,18 +1138,18 @@
" T1w | \n",
" .nii.gz | \n",
" {} | \n",
- " {'Manufacturer': 'Philips', 'ManufacturersMode... | \n",
- " /Users/clane/Projects/ScalableQC/code/bids2tab... | \n",
" None | \n",
- " 1.687883e+09 | \n",
+ " /Users/clane/Projects/B2T/bids2table/bids-exam... | \n",
+ " None | \n",
+ " 1.691420e+09 | \n",
"
\n",
" \n",
" 1 | \n",
- " asl002 | \n",
- " raw | \n",
- " /Users/clane/Projects/ScalableQC/code/bids2tab... | \n",
- " {'Name': 'ASL_Philips_PCASL_2DEPI', 'BIDSVersi... | \n",
- " Sub103 | \n",
+ " ds002 | \n",
+ " None | \n",
+ " /Users/clane/Projects/B2T/bids2table/bids-exam... | \n",
+ " {'BIDSVersion': '1.0.0', 'License': 'This data... | \n",
+ " 15 | \n",
" None | \n",
" None | \n",
" None | \n",
@@ -1005,32 +1177,32 @@
" None | \n",
" None | \n",
" None | \n",
- " perf | \n",
- " m0scan | \n",
+ " anat | \n",
+ " inplaneT2 | \n",
" .nii.gz | \n",
" {} | \n",
- " {'Manufacturer': 'Philips', 'ManufacturersMode... | \n",
- " /Users/clane/Projects/ScalableQC/code/bids2tab... | \n",
" None | \n",
- " 1.687883e+09 | \n",
+ " /Users/clane/Projects/B2T/bids2table/bids-exam... | \n",
+ " None | \n",
+ " 1.691420e+09 | \n",
"
\n",
" \n",
" 2 | \n",
- " asl002 | \n",
- " raw | \n",
- " /Users/clane/Projects/ScalableQC/code/bids2tab... | \n",
- " {'Name': 'ASL_Philips_PCASL_2DEPI', 'BIDSVersi... | \n",
- " Sub103 | \n",
+ " ds002 | \n",
" None | \n",
+ " /Users/clane/Projects/B2T/bids2table/bids-exam... | \n",
+ " {'BIDSVersion': '1.0.0', 'License': 'This data... | \n",
+ " 15 | \n",
" None | \n",
" None | \n",
+ " probabilisticclassification | \n",
" None | \n",
" None | \n",
" None | \n",
" None | \n",
" None | \n",
" None | \n",
- " NaN | \n",
+ " 1.0 | \n",
" None | \n",
" NaN | \n",
" NaN | \n",
@@ -1048,161 +1220,987 @@
" None | \n",
" None | \n",
" None | \n",
- " perf | \n",
- " asl | \n",
+ " func | \n",
+ " bold | \n",
" .nii.gz | \n",
" {} | \n",
- " {'Manufacturer': 'Philips', 'ManufacturersMode... | \n",
- " /Users/clane/Projects/ScalableQC/code/bids2tab... | \n",
+ " {'RepetitionTime': 2.0, 'TaskName': 'probabili... | \n",
+ " /Users/clane/Projects/B2T/bids2table/bids-exam... | \n",
" None | \n",
- " 1.687883e+09 | \n",
+ " 1.691420e+09 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " dataset dataset_type dataset_path \n",
- "0 asl002 raw /Users/clane/Projects/ScalableQC/code/bids2tab... \\\n",
- "1 asl002 raw /Users/clane/Projects/ScalableQC/code/bids2tab... \n",
- "2 asl002 raw /Users/clane/Projects/ScalableQC/code/bids2tab... \n",
- "\n",
- " dataset_description sub ses sample \n",
- "0 {'Name': 'ASL_Philips_PCASL_2DEPI', 'BIDSVersi... Sub103 None None \\\n",
- "1 {'Name': 'ASL_Philips_PCASL_2DEPI', 'BIDSVersi... Sub103 None None \n",
- "2 {'Name': 'ASL_Philips_PCASL_2DEPI', 'BIDSVersi... Sub103 None None \n",
- "\n",
- " task acq ce trc stain rec dir run mod echo flip inv mt \n",
- "0 None None None None None None None NaN None NaN NaN NaN None \\\n",
- "1 None None None None None None None NaN None NaN NaN NaN None \n",
- "2 None None None None None None None NaN None NaN NaN NaN None \n",
- "\n",
- " part proc hemi space split recording chunk atlas res den label \n",
- "0 None None None None NaN None NaN None None None None \\\n",
- "1 None None None None NaN None NaN None None None None \n",
- "2 None None None None NaN None NaN None None None None \n",
- "\n",
- " desc datatype suffix ext extra_entities \n",
- "0 None anat T1w .nii.gz {} \\\n",
- "1 None perf m0scan .nii.gz {} \n",
- "2 None perf asl .nii.gz {} \n",
+ " dataset dataset_type dataset_path \\\n",
+ "0 ds002 None /Users/clane/Projects/B2T/bids2table/bids-exam... \n",
+ "1 ds002 None /Users/clane/Projects/B2T/bids2table/bids-exam... \n",
+ "2 ds002 None /Users/clane/Projects/B2T/bids2table/bids-exam... \n",
+ "\n",
+ " dataset_description sub ses sample \\\n",
+ "0 {'BIDSVersion': '1.0.0', 'License': 'This data... 15 None None \n",
+ "1 {'BIDSVersion': '1.0.0', 'License': 'This data... 15 None None \n",
+ "2 {'BIDSVersion': '1.0.0', 'License': 'This data... 15 None None \n",
+ "\n",
+ " task acq ce trc stain rec dir run mod \\\n",
+ "0 None None None None None None None NaN None \n",
+ "1 None None None None None None None NaN None \n",
+ "2 probabilisticclassification None None None None None None 1.0 None \n",
+ "\n",
+ " echo flip inv mt part proc hemi space split recording chunk \\\n",
+ "0 NaN NaN NaN None None None None None NaN None NaN \n",
+ "1 NaN NaN NaN None None None None None NaN None NaN \n",
+ "2 NaN NaN NaN None None None None None NaN None NaN \n",
+ "\n",
+ " atlas res den label desc datatype suffix ext extra_entities \\\n",
+ "0 None None None None None anat T1w .nii.gz {} \n",
+ "1 None None None None None anat inplaneT2 .nii.gz {} \n",
+ "2 None None None None None func bold .nii.gz {} \n",
+ "\n",
+ " json \\\n",
+ "0 None \n",
+ "1 None \n",
+ "2 {'RepetitionTime': 2.0, 'TaskName': 'probabili... \n",
+ "\n",
+ " file_path link_target mod_time \n",
+ "0 /Users/clane/Projects/B2T/bids2table/bids-exam... None 1.691420e+09 \n",
+ "1 /Users/clane/Projects/B2T/bids2table/bids-exam... None 1.691420e+09 \n",
+ "2 /Users/clane/Projects/B2T/bids2table/bids-exam... None 1.691420e+09 "
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "tab.flat.head(3)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- You can access flattened JSON metadata."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " RepetitionTime | \n",
+ " TaskName | \n",
+ " InstitutionAddress | \n",
+ " InstitutionName | \n",
+ " InstitutionalDepartmentName | \n",
+ " PowerLineFrequency | \n",
+ " ManufacturersModelName | \n",
+ " EEGReference | \n",
+ " Manufacturer | \n",
+ " EEGChannelCount | \n",
+ " MiscChannelCount | \n",
+ " RecordingType | \n",
+ " RecordingDuration | \n",
+ " SamplingFrequency | \n",
+ " EOGChannelCount | \n",
+ " ECGChannelCount | \n",
+ " EMGChannelCount | \n",
+ " SoftwareFilters | \n",
+ " onset | \n",
+ " duration | \n",
+ " trial_type | \n",
+ " response_time | \n",
+ " sample | \n",
+ " value | \n",
+ " SoftwareVersions | \n",
+ " MagneticFieldStrength | \n",
+ " ReceiveCoilName | \n",
+ " ReceiveCoilActiveElements | \n",
+ " ScanningSequence | \n",
+ " SequenceVariant | \n",
+ " ScanOptions | \n",
+ " SequenceName | \n",
+ " PulseSequenceDetails | \n",
+ " ParallelReductionFactorInPlane | \n",
+ " PartialFourier | \n",
+ " EchoTime | \n",
+ " InversionTime | \n",
+ " DwellTime | \n",
+ " FlipAngle | \n",
+ " MRAcquisitionType | \n",
+ " PulseSequenceType | \n",
+ " PhaseEncodingDirection | \n",
+ " EffectiveEchoSpacing | \n",
+ " TotalReadoutTime | \n",
+ " RepetitionTimePreparation | \n",
+ " IntendedFor | \n",
+ " AcquisitionVoxelsize | \n",
+ " NumberShots | \n",
+ " ArterialSpinLabelingType | \n",
+ " PostLabelingDelay | \n",
+ " ... | \n",
+ " SpoilingRFPhaseIncrement | \n",
+ " MagneticFliedStrength | \n",
+ " PulseSequence | \n",
+ " SpoilingState | \n",
+ " SpoilingType | \n",
+ " SpoilingGradientMoment | \n",
+ " SpoilingGradientDuration | \n",
+ " a_comp_cor_179 | \n",
+ " a_comp_cor_180 | \n",
+ " a_comp_cor_181 | \n",
+ " a_comp_cor_182 | \n",
+ " a_comp_cor_183 | \n",
+ " t_comp_cor_06 | \n",
+ " a_comp_cor_184 | \n",
+ " a_comp_cor_185 | \n",
+ " a_comp_cor_186 | \n",
+ " a_comp_cor_187 | \n",
+ " a_comp_cor_188 | \n",
+ " aroma_motion_45 | \n",
+ " aroma_motion_46 | \n",
+ " aroma_motion_47 | \n",
+ " aroma_motion_48 | \n",
+ " aroma_motion_49 | \n",
+ " aroma_motion_50 | \n",
+ " aroma_motion_51 | \n",
+ " aroma_motion_52 | \n",
+ " aroma_motion_53 | \n",
+ " dropped_568 | \n",
+ " dropped_569 | \n",
+ " dropped_570 | \n",
+ " dropped_571 | \n",
+ " PharmaceuticalName | \n",
+ " PharmaceuticalDoseAmount | \n",
+ " PharmaceuticalDoseAmountUnits | \n",
+ " PharmaceuticalDoseRegimen | \n",
+ " PharmaceuticalDoseTime | \n",
+ " InfusionRadioactivity | \n",
+ " InfusionStart | \n",
+ " InfusionSpeed | \n",
+ " InfusionSpeedUnits | \n",
+ " InjectedVolume | \n",
+ " TracerInjectionType | \n",
+ " InjectionEnd | \n",
+ " AttenuationCorrectionMethodReference | \n",
+ " NonLinearGradientCorrection | \n",
+ " PhaseOversampling | \n",
+ " PercentSampling | \n",
+ " InjectedMassPerWeight | \n",
+ " InjectedMassPerWeightUnits | \n",
+ " ElectricalStimulationParameters | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2.0 | \n",
+ " probabilistic classification | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
3 rows × 1177 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " RepetitionTime TaskName InstitutionAddress \\\n",
+ "0 NaN NaN NaN \n",
+ "1 NaN NaN NaN \n",
+ "2 2.0 probabilistic classification NaN \n",
+ "\n",
+ " InstitutionName InstitutionalDepartmentName PowerLineFrequency \\\n",
+ "0 NaN NaN NaN \n",
+ "1 NaN NaN NaN \n",
+ "2 NaN NaN NaN \n",
+ "\n",
+ " ManufacturersModelName EEGReference Manufacturer EEGChannelCount \\\n",
+ "0 NaN NaN NaN NaN \n",
+ "1 NaN NaN NaN NaN \n",
+ "2 NaN NaN NaN NaN \n",
+ "\n",
+ " MiscChannelCount RecordingType RecordingDuration SamplingFrequency \\\n",
+ "0 NaN NaN NaN NaN \n",
+ "1 NaN NaN NaN NaN \n",
+ "2 NaN NaN NaN NaN \n",
+ "\n",
+ " EOGChannelCount ECGChannelCount EMGChannelCount SoftwareFilters onset \\\n",
+ "0 NaN NaN NaN NaN NaN \n",
+ "1 NaN NaN NaN NaN NaN \n",
+ "2 NaN NaN NaN NaN NaN \n",
+ "\n",
+ " duration trial_type response_time sample value SoftwareVersions \\\n",
+ "0 NaN NaN NaN NaN NaN NaN \n",
+ "1 NaN NaN NaN NaN NaN NaN \n",
+ "2 NaN NaN NaN NaN NaN NaN \n",
+ "\n",
+ " MagneticFieldStrength ReceiveCoilName ReceiveCoilActiveElements \\\n",
+ "0 NaN NaN NaN \n",
+ "1 NaN NaN NaN \n",
+ "2 NaN NaN NaN \n",
+ "\n",
+ " ScanningSequence SequenceVariant ScanOptions SequenceName \\\n",
+ "0 NaN NaN NaN NaN \n",
+ "1 NaN NaN NaN NaN \n",
+ "2 NaN NaN NaN NaN \n",
+ "\n",
+ " PulseSequenceDetails ParallelReductionFactorInPlane PartialFourier \\\n",
+ "0 NaN NaN NaN \n",
+ "1 NaN NaN NaN \n",
+ "2 NaN NaN NaN \n",
+ "\n",
+ " EchoTime InversionTime DwellTime FlipAngle MRAcquisitionType \\\n",
+ "0 NaN NaN NaN NaN NaN \n",
+ "1 NaN NaN NaN NaN NaN \n",
+ "2 NaN NaN NaN NaN NaN \n",
+ "\n",
+ " PulseSequenceType PhaseEncodingDirection EffectiveEchoSpacing \\\n",
+ "0 NaN NaN NaN \n",
+ "1 NaN NaN NaN \n",
+ "2 NaN NaN NaN \n",
+ "\n",
+ " TotalReadoutTime RepetitionTimePreparation IntendedFor \\\n",
+ "0 NaN NaN NaN \n",
+ "1 NaN NaN NaN \n",
+ "2 NaN NaN NaN \n",
+ "\n",
+ " AcquisitionVoxelsize NumberShots ArterialSpinLabelingType \\\n",
+ "0 NaN NaN NaN \n",
+ "1 NaN NaN NaN \n",
+ "2 NaN NaN NaN \n",
+ "\n",
+ " PostLabelingDelay ... SpoilingRFPhaseIncrement MagneticFliedStrength \\\n",
+ "0 NaN ... NaN NaN \n",
+ "1 NaN ... NaN NaN \n",
+ "2 NaN ... NaN NaN \n",
+ "\n",
+ " PulseSequence SpoilingState SpoilingType SpoilingGradientMoment \\\n",
+ "0 NaN NaN NaN NaN \n",
+ "1 NaN NaN NaN NaN \n",
+ "2 NaN NaN NaN NaN \n",
+ "\n",
+ " SpoilingGradientDuration a_comp_cor_179 a_comp_cor_180 a_comp_cor_181 \\\n",
+ "0 NaN NaN NaN NaN \n",
+ "1 NaN NaN NaN NaN \n",
+ "2 NaN NaN NaN NaN \n",
+ "\n",
+ " a_comp_cor_182 a_comp_cor_183 t_comp_cor_06 a_comp_cor_184 a_comp_cor_185 \\\n",
+ "0 NaN NaN NaN NaN NaN \n",
+ "1 NaN NaN NaN NaN NaN \n",
+ "2 NaN NaN NaN NaN NaN \n",
+ "\n",
+ " a_comp_cor_186 a_comp_cor_187 a_comp_cor_188 aroma_motion_45 \\\n",
+ "0 NaN NaN NaN NaN \n",
+ "1 NaN NaN NaN NaN \n",
+ "2 NaN NaN NaN NaN \n",
+ "\n",
+ " aroma_motion_46 aroma_motion_47 aroma_motion_48 aroma_motion_49 \\\n",
+ "0 NaN NaN NaN NaN \n",
+ "1 NaN NaN NaN NaN \n",
+ "2 NaN NaN NaN NaN \n",
+ "\n",
+ " aroma_motion_50 aroma_motion_51 aroma_motion_52 aroma_motion_53 dropped_568 \\\n",
+ "0 NaN NaN NaN NaN NaN \n",
+ "1 NaN NaN NaN NaN NaN \n",
+ "2 NaN NaN NaN NaN NaN \n",
+ "\n",
+ " dropped_569 dropped_570 dropped_571 PharmaceuticalName \\\n",
+ "0 NaN NaN NaN NaN \n",
+ "1 NaN NaN NaN NaN \n",
+ "2 NaN NaN NaN NaN \n",
+ "\n",
+ " PharmaceuticalDoseAmount PharmaceuticalDoseAmountUnits \\\n",
+ "0 NaN NaN \n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "\n",
+ " PharmaceuticalDoseRegimen PharmaceuticalDoseTime InfusionRadioactivity \\\n",
+ "0 NaN NaN NaN \n",
+ "1 NaN NaN NaN \n",
+ "2 NaN NaN NaN \n",
+ "\n",
+ " InfusionStart InfusionSpeed InfusionSpeedUnits InjectedVolume \\\n",
+ "0 NaN NaN NaN NaN \n",
+ "1 NaN NaN NaN NaN \n",
+ "2 NaN NaN NaN NaN \n",
+ "\n",
+ " TracerInjectionType InjectionEnd AttenuationCorrectionMethodReference \\\n",
+ "0 NaN NaN NaN \n",
+ "1 NaN NaN NaN \n",
+ "2 NaN NaN NaN \n",
+ "\n",
+ " NonLinearGradientCorrection PhaseOversampling PercentSampling \\\n",
+ "0 NaN NaN NaN \n",
+ "1 NaN NaN NaN \n",
+ "2 NaN NaN NaN \n",
+ "\n",
+ " InjectedMassPerWeight InjectedMassPerWeightUnits \\\n",
+ "0 NaN NaN \n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "\n",
+ " ElectricalStimulationParameters \n",
+ "0 NaN \n",
+ "1 NaN \n",
+ "2 NaN \n",
+ "\n",
+ "[3 rows x 1177 columns]"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "tab.flat_meta.head(3)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- You can still slice the table and get back a `BIDSTable`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " (500, 40)\n"
+ ]
+ }
+ ],
+ "source": [
+ "subtab = tab.iloc[:500]\n",
+ "print(type(subtab), subtab.shape)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Sorting rows\n",
+ "\n",
+ "By default the rows are in arbitrary order. We can sort the rows by dataset, subject, session, task, and run."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ds__dataset | \n",
+ " ds__dataset_type | \n",
+ " ds__dataset_path | \n",
+ " ds__dataset_description | \n",
+ " ent__sub | \n",
+ " ent__ses | \n",
+ " ent__sample | \n",
+ " ent__task | \n",
+ " ent__acq | \n",
+ " ent__ce | \n",
+ " ent__trc | \n",
+ " ent__stain | \n",
+ " ent__rec | \n",
+ " ent__dir | \n",
+ " ent__run | \n",
+ " ent__mod | \n",
+ " ent__echo | \n",
+ " ent__flip | \n",
+ " ent__inv | \n",
+ " ent__mt | \n",
+ " ent__part | \n",
+ " ent__proc | \n",
+ " ent__hemi | \n",
+ " ent__space | \n",
+ " ent__split | \n",
+ " ent__recording | \n",
+ " ent__chunk | \n",
+ " ent__atlas | \n",
+ " ent__res | \n",
+ " ent__den | \n",
+ " ent__label | \n",
+ " ent__desc | \n",
+ " ent__datatype | \n",
+ " ent__suffix | \n",
+ " ent__ext | \n",
+ " ent__extra_entities | \n",
+ " meta__json | \n",
+ " finfo__file_path | \n",
+ " finfo__link_target | \n",
+ " finfo__mod_time | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 3788 | \n",
+ " 7t_trt | \n",
+ " None | \n",
+ " /Users/clane/Projects/B2T/bids2table/bids-exam... | \n",
+ " {'BIDSVersion': '1.8.0', 'Name': '7t_trt'} | \n",
+ " 01 | \n",
+ " 1 | \n",
+ " None | \n",
+ " rest | \n",
+ " fullbrain | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " 1.0 | \n",
+ " None | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " NaN | \n",
+ " None | \n",
+ " NaN | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " func | \n",
+ " bold | \n",
+ " .nii.gz | \n",
+ " {} | \n",
+ " {'CogAtlasID': 'https://www.cognitiveatlas.org... | \n",
+ " /Users/clane/Projects/B2T/bids2table/bids-exam... | \n",
+ " None | \n",
+ " 1.691420e+09 | \n",
+ "
\n",
+ " \n",
+ " 3790 | \n",
+ " 7t_trt | \n",
+ " None | \n",
+ " /Users/clane/Projects/B2T/bids2table/bids-exam... | \n",
+ " {'BIDSVersion': '1.8.0', 'Name': '7t_trt'} | \n",
+ " 01 | \n",
+ " 1 | \n",
+ " None | \n",
+ " rest | \n",
+ " fullbrain | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " 1.0 | \n",
+ " None | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " NaN | \n",
+ " None | \n",
+ " NaN | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " func | \n",
+ " physio | \n",
+ " .tsv.gz | \n",
+ " {} | \n",
+ " {'StartTime': 0, 'SamplingFrequency': 100, 'Co... | \n",
+ " /Users/clane/Projects/B2T/bids2table/bids-exam... | \n",
+ " None | \n",
+ " 1.691420e+09 | \n",
+ "
\n",
+ " \n",
+ " 3786 | \n",
+ " 7t_trt | \n",
+ " None | \n",
+ " /Users/clane/Projects/B2T/bids2table/bids-exam... | \n",
+ " {'BIDSVersion': '1.8.0', 'Name': '7t_trt'} | \n",
+ " 01 | \n",
+ " 1 | \n",
+ " None | \n",
+ " rest | \n",
+ " fullbrain | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " 2.0 | \n",
+ " None | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " NaN | \n",
+ " None | \n",
+ " NaN | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " func | \n",
+ " bold | \n",
+ " .nii.gz | \n",
+ " {} | \n",
+ " {'CogAtlasID': 'https://www.cognitiveatlas.org... | \n",
+ " /Users/clane/Projects/B2T/bids2table/bids-exam... | \n",
+ " None | \n",
+ " 1.691420e+09 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ds__dataset ds__dataset_type \\\n",
+ "3788 7t_trt None \n",
+ "3790 7t_trt None \n",
+ "3786 7t_trt None \n",
"\n",
- " json \n",
- "0 {'Manufacturer': 'Philips', 'ManufacturersMode... \\\n",
- "1 {'Manufacturer': 'Philips', 'ManufacturersMode... \n",
- "2 {'Manufacturer': 'Philips', 'ManufacturersMode... \n",
+ " ds__dataset_path \\\n",
+ "3788 /Users/clane/Projects/B2T/bids2table/bids-exam... \n",
+ "3790 /Users/clane/Projects/B2T/bids2table/bids-exam... \n",
+ "3786 /Users/clane/Projects/B2T/bids2table/bids-exam... \n",
"\n",
- " file_path link_target mod_time \n",
- "0 /Users/clane/Projects/ScalableQC/code/bids2tab... None 1.687883e+09 \n",
- "1 /Users/clane/Projects/ScalableQC/code/bids2tab... None 1.687883e+09 \n",
- "2 /Users/clane/Projects/ScalableQC/code/bids2tab... None 1.687883e+09 "
+ " ds__dataset_description ent__sub ent__ses \\\n",
+ "3788 {'BIDSVersion': '1.8.0', 'Name': '7t_trt'} 01 1 \n",
+ "3790 {'BIDSVersion': '1.8.0', 'Name': '7t_trt'} 01 1 \n",
+ "3786 {'BIDSVersion': '1.8.0', 'Name': '7t_trt'} 01 1 \n",
+ "\n",
+ " ent__sample ent__task ent__acq ent__ce ent__trc ent__stain ent__rec \\\n",
+ "3788 None rest fullbrain None None None None \n",
+ "3790 None rest fullbrain None None None None \n",
+ "3786 None rest fullbrain None None None None \n",
+ "\n",
+ " ent__dir ent__run ent__mod ent__echo ent__flip ent__inv ent__mt \\\n",
+ "3788 None 1.0 None NaN NaN NaN None \n",
+ "3790 None 1.0 None NaN NaN NaN None \n",
+ "3786 None 2.0 None NaN NaN NaN None \n",
+ "\n",
+ " ent__part ent__proc ent__hemi ent__space ent__split ent__recording \\\n",
+ "3788 None None None None NaN None \n",
+ "3790 None None None None NaN None \n",
+ "3786 None None None None NaN None \n",
+ "\n",
+ " ent__chunk ent__atlas ent__res ent__den ent__label ent__desc \\\n",
+ "3788 NaN None None None None None \n",
+ "3790 NaN None None None None None \n",
+ "3786 NaN None None None None None \n",
+ "\n",
+ " ent__datatype ent__suffix ent__ext ent__extra_entities \\\n",
+ "3788 func bold .nii.gz {} \n",
+ "3790 func physio .tsv.gz {} \n",
+ "3786 func bold .nii.gz {} \n",
+ "\n",
+ " meta__json \\\n",
+ "3788 {'CogAtlasID': 'https://www.cognitiveatlas.org... \n",
+ "3790 {'StartTime': 0, 'SamplingFrequency': 100, 'Co... \n",
+ "3786 {'CogAtlasID': 'https://www.cognitiveatlas.org... \n",
+ "\n",
+ " finfo__file_path finfo__link_target \\\n",
+ "3788 /Users/clane/Projects/B2T/bids2table/bids-exam... None \n",
+ "3790 /Users/clane/Projects/B2T/bids2table/bids-exam... None \n",
+ "3786 /Users/clane/Projects/B2T/bids2table/bids-exam... None \n",
+ "\n",
+ " finfo__mod_time \n",
+ "3788 1.691420e+09 \n",
+ "3790 1.691420e+09 \n",
+ "3786 1.691420e+09 "
]
},
- "execution_count": 8,
+ "execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "df_drop = df_multi.droplevel(0, axis=1)\n",
- "\n",
- "df_drop.head(3)"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Columns and types\n",
- "\n",
- "Now let's look at the column names and pandas types.\n",
- "\n",
- "> TODO: not all types are preserved when converting parquet to pandas. In particular, strings are mapped to objects and ints with `None` to float with `NaN`."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Shape: (10266, 40)\n",
- "Columns:\n",
- " ds__dataset: object\n",
- " ds__dataset_type: object\n",
- " ds__dataset_path: object\n",
- " ds__dataset_description: json\n",
- " ent__sub: object\n",
- " ent__ses: object\n",
- " ent__sample: object\n",
- " ent__task: object\n",
- " ent__acq: object\n",
- " ent__ce: object\n",
- " ent__trc: object\n",
- " ent__stain: object\n",
- " ent__rec: object\n",
- " ent__dir: object\n",
- " ent__run: float64\n",
- " ent__mod: object\n",
- " ent__echo: float64\n",
- " ent__flip: float64\n",
- " ent__inv: float64\n",
- " ent__mt: object\n",
- " ent__part: object\n",
- " ent__proc: object\n",
- " ent__hemi: object\n",
- " ent__space: object\n",
- " ent__split: float64\n",
- " ent__recording: object\n",
- " ent__chunk: float64\n",
- " ent__atlas: object\n",
- " ent__res: object\n",
- " ent__den: object\n",
- " ent__label: object\n",
- " ent__desc: object\n",
- " ent__datatype: object\n",
- " ent__suffix: object\n",
- " ent__ext: object\n",
- " ent__extra_entities: json\n",
- " meta__json: json\n",
- " file__file_path: object\n",
- " file__link_target: object\n",
- " file__mod_time: float64\n"
- ]
- }
- ],
- "source": [
- "print(f\"Shape: \", df.shape)\n",
- "print(\n",
- " \"Columns:\\n\"\n",
- " + \"\\n\".join(f\" {name}: {typ}\" for name, typ in df.dtypes.to_dict().items())\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Most columns are numeric (`float64`) or string (`object`) type. However there are some columns (`ds__dataset_description`, `ent__extra_entities`, `meta__json`) which use the elbow extension `json` type for arbitrary nested dicts."
+ "sort_tab = tab.sort_entities([\"dataset\", \"sub\", \"ses\", \"task\", \"run\"])\n",
+ "sort_tab.head(3)"
]
},
{
- "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
- "### Sorting rows\n",
+ "### Filtering\n",
"\n",
- "By default the rows are in arbitrary order. We can sort the values in place."
+ "In addition to all the usual pandas slicing operations, `BIDSTable`s also support higher-level filtering operations inspired by the PyBIDS `BIDSLayout.get` method and the pandas `Series.filter` method."
]
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 13,
"metadata": {},
"outputs": [
{
@@ -1263,72 +2261,115 @@
" ent__ext | \n",
" ent__extra_entities | \n",
" meta__json | \n",
- " file__file_path | \n",
- " file__link_target | \n",
- " file__mod_time | \n",
+ " finfo__file_path | \n",
+ " finfo__link_target | \n",
+ " finfo__mod_time | \n",
" \n",
" \n",
" \n",
" \n",
- " 9284 | \n",
- " 7t_trt | \n",
- " None | \n",
- " /Users/clane/Projects/ScalableQC/code/bids2tab... | \n",
- " {'BIDSVersion': '1.8.0', 'Name': '7t_trt'} | \n",
- " 01 | \n",
- " 1 | \n",
+ " 1554 | \n",
+ " synthetic/derivatives/fmriprep | \n",
+ " derivative | \n",
+ " /Users/clane/Projects/B2T/bids2table/bids-exam... | \n",
+ " {'Name': 'fMRIPrep - fMRI PREProcessing workfl... | \n",
+ " 04 | \n",
+ " 02 | \n",
" None | \n",
" rest | \n",
- " fullbrain | \n",
" None | \n",
" None | \n",
" None | \n",
" None | \n",
" None | \n",
- " 1.0 | \n",
" None | \n",
" NaN | \n",
+ " None | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " T1w | \n",
" NaN | \n",
+ " None | \n",
" NaN | \n",
" None | \n",
" None | \n",
" None | \n",
" None | \n",
+ " preproc | \n",
+ " func | \n",
+ " bold | \n",
+ " .nii | \n",
+ " {} | \n",
+ " {'Sources': ['bids:raw:sub-04/ses-02/sub-04_se... | \n",
+ " /Users/clane/Projects/B2T/bids2table/bids-exam... | \n",
+ " None | \n",
+ " 1.691420e+09 | \n",
+ "
\n",
+ " \n",
+ " 1567 | \n",
+ " synthetic/derivatives/fmriprep | \n",
+ " derivative | \n",
+ " /Users/clane/Projects/B2T/bids2table/bids-exam... | \n",
+ " {'Name': 'fMRIPrep - fMRI PREProcessing workfl... | \n",
+ " 04 | \n",
+ " 02 | \n",
+ " None | \n",
+ " rest | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " NaN | \n",
" None | \n",
" NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " None | \n",
" None | \n",
+ " None | \n",
+ " MNI152NLin2009cAsym | \n",
" NaN | \n",
" None | \n",
+ " NaN | \n",
" None | \n",
" None | \n",
" None | \n",
" None | \n",
+ " preproc | \n",
" func | \n",
" bold | \n",
- " .nii.gz | \n",
+ " .nii | \n",
" {} | \n",
- " {'CogAtlasID': 'https://www.cognitiveatlas.org... | \n",
- " /Users/clane/Projects/ScalableQC/code/bids2tab... | \n",
+ " {'Sources': ['bids:raw:sub-04/ses-02/sub-04_se... | \n",
+ " /Users/clane/Projects/B2T/bids2table/bids-exam... | \n",
" None | \n",
- " 1.687883e+09 | \n",
+ " 1.691420e+09 | \n",
"
\n",
" \n",
- " 9286 | \n",
- " 7t_trt | \n",
- " None | \n",
- " /Users/clane/Projects/ScalableQC/code/bids2tab... | \n",
- " {'BIDSVersion': '1.8.0', 'Name': '7t_trt'} | \n",
+ " 1576 | \n",
+ " synthetic/derivatives/fmriprep | \n",
+ " derivative | \n",
+ " /Users/clane/Projects/B2T/bids2table/bids-exam... | \n",
+ " {'Name': 'fMRIPrep - fMRI PREProcessing workfl... | \n",
+ " 04 | \n",
" 01 | \n",
- " 1 | \n",
" None | \n",
" rest | \n",
- " fullbrain | \n",
" None | \n",
" None | \n",
" None | \n",
" None | \n",
" None | \n",
- " 1.0 | \n",
+ " None | \n",
+ " NaN | \n",
" None | \n",
" NaN | \n",
" NaN | \n",
@@ -1337,41 +2378,127 @@
" None | \n",
" None | \n",
" None | \n",
+ " T1w | \n",
+ " NaN | \n",
+ " None | \n",
+ " NaN | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " preproc | \n",
+ " func | \n",
+ " bold | \n",
+ " .nii | \n",
+ " {} | \n",
+ " {'Sources': ['bids:raw:sub-04/ses-01/sub-04_se... | \n",
+ " /Users/clane/Projects/B2T/bids2table/bids-exam... | \n",
+ " None | \n",
+ " 1.691420e+09 | \n",
+ "
\n",
+ " \n",
+ " 1579 | \n",
+ " synthetic/derivatives/fmriprep | \n",
+ " derivative | \n",
+ " /Users/clane/Projects/B2T/bids2table/bids-exam... | \n",
+ " {'Name': 'fMRIPrep - fMRI PREProcessing workfl... | \n",
+ " 04 | \n",
+ " 01 | \n",
+ " None | \n",
+ " rest | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " NaN | \n",
" None | \n",
" NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " None | \n",
" None | \n",
+ " None | \n",
+ " MNI152NLin2009cAsym | \n",
" NaN | \n",
" None | \n",
+ " NaN | \n",
" None | \n",
" None | \n",
" None | \n",
" None | \n",
+ " preproc | \n",
" func | \n",
- " physio | \n",
- " .tsv.gz | \n",
+ " bold | \n",
+ " .nii | \n",
" {} | \n",
- " {'StartTime': 0, 'SamplingFrequency': 100, 'Co... | \n",
- " /Users/clane/Projects/ScalableQC/code/bids2tab... | \n",
+ " {'Sources': ['bids:raw:sub-04/ses-01/sub-04_se... | \n",
+ " /Users/clane/Projects/B2T/bids2table/bids-exam... | \n",
" None | \n",
- " 1.687883e+09 | \n",
+ " 1.691420e+09 | \n",
"
\n",
" \n",
- " 9282 | \n",
- " 7t_trt | \n",
+ " 4222 | \n",
+ " synthetic | \n",
+ " raw | \n",
+ " /Users/clane/Projects/B2T/bids2table/bids-exam... | \n",
+ " {'Name': 'Synthetic dataset for inclusion in B... | \n",
+ " 04 | \n",
+ " 02 | \n",
" None | \n",
- " /Users/clane/Projects/ScalableQC/code/bids2tab... | \n",
- " {'BIDSVersion': '1.8.0', 'Name': '7t_trt'} | \n",
+ " rest | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " NaN | \n",
+ " None | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " NaN | \n",
+ " None | \n",
+ " NaN | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " func | \n",
+ " bold | \n",
+ " .nii | \n",
+ " {} | \n",
+ " {'TaskName': 'Rest', 'RepetitionTime': 2.5} | \n",
+ " /Users/clane/Projects/B2T/bids2table/bids-exam... | \n",
+ " None | \n",
+ " 1.691420e+09 | \n",
+ "
\n",
+ " \n",
+ " 4235 | \n",
+ " synthetic | \n",
+ " raw | \n",
+ " /Users/clane/Projects/B2T/bids2table/bids-exam... | \n",
+ " {'Name': 'Synthetic dataset for inclusion in B... | \n",
+ " 04 | \n",
" 01 | \n",
- " 1 | \n",
" None | \n",
" rest | \n",
- " fullbrain | \n",
" None | \n",
" None | \n",
" None | \n",
" None | \n",
" None | \n",
- " 2.0 | \n",
+ " None | \n",
+ " NaN | \n",
" None | \n",
" NaN | \n",
" NaN | \n",
@@ -1391,85 +2518,211 @@
" None | \n",
" func | \n",
" bold | \n",
- " .nii.gz | \n",
+ " .nii | \n",
" {} | \n",
- " {'CogAtlasID': 'https://www.cognitiveatlas.org... | \n",
- " /Users/clane/Projects/ScalableQC/code/bids2tab... | \n",
+ " {'TaskName': 'Rest', 'RepetitionTime': 2.5} | \n",
+ " /Users/clane/Projects/B2T/bids2table/bids-exam... | \n",
" None | \n",
- " 1.687883e+09 | \n",
+ " 1.691420e+09 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " ds__dataset ds__dataset_type \n",
- "9284 7t_trt None \\\n",
- "9286 7t_trt None \n",
- "9282 7t_trt None \n",
- "\n",
- " ds__dataset_path \n",
- "9284 /Users/clane/Projects/ScalableQC/code/bids2tab... \\\n",
- "9286 /Users/clane/Projects/ScalableQC/code/bids2tab... \n",
- "9282 /Users/clane/Projects/ScalableQC/code/bids2tab... \n",
- "\n",
- " ds__dataset_description ent__sub ent__ses \n",
- "9284 {'BIDSVersion': '1.8.0', 'Name': '7t_trt'} 01 1 \\\n",
- "9286 {'BIDSVersion': '1.8.0', 'Name': '7t_trt'} 01 1 \n",
- "9282 {'BIDSVersion': '1.8.0', 'Name': '7t_trt'} 01 1 \n",
- "\n",
- " ent__sample ent__task ent__acq ent__ce ent__trc ent__stain ent__rec \n",
- "9284 None rest fullbrain None None None None \\\n",
- "9286 None rest fullbrain None None None None \n",
- "9282 None rest fullbrain None None None None \n",
- "\n",
- " ent__dir ent__run ent__mod ent__echo ent__flip ent__inv ent__mt \n",
- "9284 None 1.0 None NaN NaN NaN None \\\n",
- "9286 None 1.0 None NaN NaN NaN None \n",
- "9282 None 2.0 None NaN NaN NaN None \n",
- "\n",
- " ent__part ent__proc ent__hemi ent__space ent__split ent__recording \n",
- "9284 None None None None NaN None \\\n",
- "9286 None None None None NaN None \n",
- "9282 None None None None NaN None \n",
- "\n",
- " ent__chunk ent__atlas ent__res ent__den ent__label ent__desc \n",
- "9284 NaN None None None None None \\\n",
- "9286 NaN None None None None None \n",
- "9282 NaN None None None None None \n",
- "\n",
- " ent__datatype ent__suffix ent__ext ent__extra_entities \n",
- "9284 func bold .nii.gz {} \\\n",
- "9286 func physio .tsv.gz {} \n",
- "9282 func bold .nii.gz {} \n",
- "\n",
- " meta__json \n",
- "9284 {'CogAtlasID': 'https://www.cognitiveatlas.org... \\\n",
- "9286 {'StartTime': 0, 'SamplingFrequency': 100, 'Co... \n",
- "9282 {'CogAtlasID': 'https://www.cognitiveatlas.org... \n",
- "\n",
- " file__file_path file__link_target \n",
- "9284 /Users/clane/Projects/ScalableQC/code/bids2tab... None \\\n",
- "9286 /Users/clane/Projects/ScalableQC/code/bids2tab... None \n",
- "9282 /Users/clane/Projects/ScalableQC/code/bids2tab... None \n",
- "\n",
- " file__mod_time \n",
- "9284 1.687883e+09 \n",
- "9286 1.687883e+09 \n",
- "9282 1.687883e+09 "
+ " ds__dataset ds__dataset_type \\\n",
+ "1554 synthetic/derivatives/fmriprep derivative \n",
+ "1567 synthetic/derivatives/fmriprep derivative \n",
+ "1576 synthetic/derivatives/fmriprep derivative \n",
+ "1579 synthetic/derivatives/fmriprep derivative \n",
+ "4222 synthetic raw \n",
+ "4235 synthetic raw \n",
+ "\n",
+ " ds__dataset_path \\\n",
+ "1554 /Users/clane/Projects/B2T/bids2table/bids-exam... \n",
+ "1567 /Users/clane/Projects/B2T/bids2table/bids-exam... \n",
+ "1576 /Users/clane/Projects/B2T/bids2table/bids-exam... \n",
+ "1579 /Users/clane/Projects/B2T/bids2table/bids-exam... \n",
+ "4222 /Users/clane/Projects/B2T/bids2table/bids-exam... \n",
+ "4235 /Users/clane/Projects/B2T/bids2table/bids-exam... \n",
+ "\n",
+ " ds__dataset_description ent__sub ent__ses \\\n",
+ "1554 {'Name': 'fMRIPrep - fMRI PREProcessing workfl... 04 02 \n",
+ "1567 {'Name': 'fMRIPrep - fMRI PREProcessing workfl... 04 02 \n",
+ "1576 {'Name': 'fMRIPrep - fMRI PREProcessing workfl... 04 01 \n",
+ "1579 {'Name': 'fMRIPrep - fMRI PREProcessing workfl... 04 01 \n",
+ "4222 {'Name': 'Synthetic dataset for inclusion in B... 04 02 \n",
+ "4235 {'Name': 'Synthetic dataset for inclusion in B... 04 01 \n",
+ "\n",
+ " ent__sample ent__task ent__acq ent__ce ent__trc ent__stain ent__rec \\\n",
+ "1554 None rest None None None None None \n",
+ "1567 None rest None None None None None \n",
+ "1576 None rest None None None None None \n",
+ "1579 None rest None None None None None \n",
+ "4222 None rest None None None None None \n",
+ "4235 None rest None None None None None \n",
+ "\n",
+ " ent__dir ent__run ent__mod ent__echo ent__flip ent__inv ent__mt \\\n",
+ "1554 None NaN None NaN NaN NaN None \n",
+ "1567 None NaN None NaN NaN NaN None \n",
+ "1576 None NaN None NaN NaN NaN None \n",
+ "1579 None NaN None NaN NaN NaN None \n",
+ "4222 None NaN None NaN NaN NaN None \n",
+ "4235 None NaN None NaN NaN NaN None \n",
+ "\n",
+ " ent__part ent__proc ent__hemi ent__space ent__split \\\n",
+ "1554 None None None T1w NaN \n",
+ "1567 None None None MNI152NLin2009cAsym NaN \n",
+ "1576 None None None T1w NaN \n",
+ "1579 None None None MNI152NLin2009cAsym NaN \n",
+ "4222 None None None None NaN \n",
+ "4235 None None None None NaN \n",
+ "\n",
+ " ent__recording ent__chunk ent__atlas ent__res ent__den ent__label \\\n",
+ "1554 None NaN None None None None \n",
+ "1567 None NaN None None None None \n",
+ "1576 None NaN None None None None \n",
+ "1579 None NaN None None None None \n",
+ "4222 None NaN None None None None \n",
+ "4235 None NaN None None None None \n",
+ "\n",
+ " ent__desc ent__datatype ent__suffix ent__ext ent__extra_entities \\\n",
+ "1554 preproc func bold .nii {} \n",
+ "1567 preproc func bold .nii {} \n",
+ "1576 preproc func bold .nii {} \n",
+ "1579 preproc func bold .nii {} \n",
+ "4222 None func bold .nii {} \n",
+ "4235 None func bold .nii {} \n",
+ "\n",
+ " meta__json \\\n",
+ "1554 {'Sources': ['bids:raw:sub-04/ses-02/sub-04_se... \n",
+ "1567 {'Sources': ['bids:raw:sub-04/ses-02/sub-04_se... \n",
+ "1576 {'Sources': ['bids:raw:sub-04/ses-01/sub-04_se... \n",
+ "1579 {'Sources': ['bids:raw:sub-04/ses-01/sub-04_se... \n",
+ "4222 {'TaskName': 'Rest', 'RepetitionTime': 2.5} \n",
+ "4235 {'TaskName': 'Rest', 'RepetitionTime': 2.5} \n",
+ "\n",
+ " finfo__file_path finfo__link_target \\\n",
+ "1554 /Users/clane/Projects/B2T/bids2table/bids-exam... None \n",
+ "1567 /Users/clane/Projects/B2T/bids2table/bids-exam... None \n",
+ "1576 /Users/clane/Projects/B2T/bids2table/bids-exam... None \n",
+ "1579 /Users/clane/Projects/B2T/bids2table/bids-exam... None \n",
+ "4222 /Users/clane/Projects/B2T/bids2table/bids-exam... None \n",
+ "4235 /Users/clane/Projects/B2T/bids2table/bids-exam... None \n",
+ "\n",
+ " finfo__mod_time \n",
+ "1554 1.691420e+09 \n",
+ "1567 1.691420e+09 \n",
+ "1576 1.691420e+09 \n",
+ "1579 1.691420e+09 \n",
+ "4222 1.691420e+09 \n",
+ "4235 1.691420e+09 "
]
},
- "execution_count": 10,
+ "execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "sort_cols = [\"ds__dataset\"] + [f\"ent__{k}\" for k in [\"sub\", \"ses\", \"task\", \"run\"]]\n",
+ "filtered = (\n",
+ " tab\n",
+ " .filter(\"task\", contains=\"rest\")\n",
+ " .filter(\"sub\", items=[\"04\", \"08\"])\n",
+ " .filter(\"RepetitionTime\", 2.5)\n",
+ ")\n",
+ "\n",
+ "filtered"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "You can also apply multiple filters at the same time with `filter_multi`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Filters equal: True\n"
+ ]
+ }
+ ],
+ "source": [
+ "filtered2 = tab.filter_multi(\n",
+ " task={\"contains\": \"rest\"},\n",
+ " sub={\"items\": [\"04\", \"08\"]},\n",
+ " RepetitionTime=2.5,\n",
+ ")\n",
+ "\n",
+ "print(\"Filters equal:\", filtered.equals(filtered2))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Getting files\n",
+ "\n",
+ "The rows of the table can also be converted to a list of structured `BIDSFile`s."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "First file: BIDSFile(dataset='synthetic/derivatives/fmriprep', root=PosixPath('/Users/clane/Projects/B2T/bids2table/bids-examples/synthetic/derivatives/fmriprep'), path=PosixPath('/Users/clane/Projects/B2T/bids2table/bids-examples/synthetic/derivatives/fmriprep/sub-04/ses-02/func/sub-04_ses-02_task-rest_space-T1w_desc-preproc_bold.nii'), entities=BIDSEntities(sub='04', ses='02', sample=None, task='rest', acq=None, ce=None, trc=None, stain=None, rec=None, dir=None, run=None, mod=None, echo=None, flip=None, inv=None, mt=None, part=None, proc=None, hemi=None, space='T1w', split=None, recording=None, chunk=None, atlas=None, res=None, den=None, label=None, desc='preproc', datatype='func', suffix='bold', ext='.nii', extra_entities={}), metadata={'Sources': ['bids:raw:sub-04/ses-02/sub-04_ses-02_task-rest_bold.nii'], 'TaskName': 'Rest', 'RepetitionTime': 2.5})\n"
+ ]
+ }
+ ],
+ "source": [
+ "files = filtered.files\n",
"\n",
- "df.sort_values(sort_cols, inplace=True)\n",
+ "print(\"First file:\", files[0])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "File paths:\n",
+ "sub-04/ses-02/func/sub-04_ses-02_task-rest_space-T1w_desc-preproc_bold.nii\n",
+ "sub-04/ses-02/func/sub-04_ses-02_task-rest_space-MNI152NLin2009cAsym_desc-preproc_bold.nii\n",
+ "sub-04/ses-01/func/sub-04_ses-01_task-rest_space-T1w_desc-preproc_bold.nii\n",
+ "sub-04/ses-01/func/sub-04_ses-01_task-rest_space-MNI152NLin2009cAsym_desc-preproc_bold.nii\n",
+ "sub-04/ses-02/func/sub-04_ses-02_task-rest_bold.nii\n",
+ "sub-04/ses-01/func/sub-04_ses-01_task-rest_bold.nii\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"File paths:\\n\", \"\\n\".join([str(f.relative_path) for f in files]), sep=\"\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Analyze the table\n",
"\n",
- "df.head(3)"
+ "Next we'll do some more detailed analysis of the table to demonstrate some of the more advanced manipulation that's possible."
]
},
{
@@ -1484,7 +2737,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 17,
"metadata": {},
"outputs": [
{
@@ -1525,13 +2778,13 @@
"dtype: int64"
]
},
- "execution_count": 11,
+ "execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "ent_counts = entities.count(axis=0)\n",
+ "ent_counts = tab.ent.count(axis=0)\n",
"ent_counts"
]
},
@@ -1545,7 +2798,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 18,
"metadata": {},
"outputs": [
{
@@ -1559,7 +2812,7 @@
"dtype: int64"
]
},
- "execution_count": 12,
+ "execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
@@ -1580,7 +2833,7 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 19,
"metadata": {},
"outputs": [
{
@@ -2163,13 +3416,13 @@
"synthetic/derivatives/fmriprep 150 60"
]
},
- "execution_count": 13,
+ "execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "df_drop.groupby(\"dataset\").agg(\n",
+ "tab.flat.groupby(\"dataset\").agg(\n",
" {\"file_path\": \"count\", \"json\": \"count\"}\n",
")"
]
@@ -2186,7 +3439,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 20,
"metadata": {},
"outputs": [
{
@@ -2233,22 +3486,22 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "176it [00:01, 89.47it/s, tot=176, good=176, rec=2245, err=0] \n",
- "203it [00:02, 90.18it/s, tot=203, good=203, rec=2630, err=0]]\n",
- "197it [00:02, 87.01it/s, tot=197, good=197, rec=2663, err=0]\n",
- "204it [00:02, 88.66it/s, tot=204, good=204, rec=2728, err=0] \n"
+ "172it [00:00, 327.64it/s, tot=172, good=172, rec=2240, err=0]\n",
+ "193it [00:00, 349.64it/s, tot=193, good=193, rec=2386, err=0]\n",
+ "213it [00:00, 333.32it/s, tot=213, good=213, rec=2812, err=0]\n",
+ "202it [00:00, 315.47it/s, tot=202, good=202, rec=2828, err=0]\n"
]
}
],
"source": [
- "! bids2table -x -w 4 ../bids-examples/"
+ "! bids2table -o bids-examples.b2t -x -w 4 ../bids-examples/"
]
},
{
@@ -2262,10 +3515,10 @@
"```bash\n",
"# Can't use --overwrite together with --worker_id\n",
"# Remove in advance\n",
- "rm -r ../bids-examples/index.b2t\n",
+ "rm -r bids-examples.b2t\n",
"\n",
"for worker_id in {0..3}; do\n",
- " bids2table --worker_id $worker_id --workers 4 ../bids-examples/ &\n",
+ " bids2table -o bids-examples.b2t --worker_id $worker_id --workers 4 ../bids-examples/ &\n",
"done\n",
"```"
]
@@ -2287,7 +3540,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.8.3"
+ "version": "3.8.17"
}
},
"nbformat": 4,
diff --git a/pyproject.toml b/pyproject.toml
index bbe40ae..5cf3eb6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,7 +9,7 @@ authors = [
{name = "Connor Lane", email = "connor.lane858@gmail.com"},
]
readme = "README.md"
-requires-python = ">=3.7"
+requires-python = ">=3.8"
license = {text = "MIT License"}
classifiers = [
"Development Status :: 3 - Alpha",
diff --git a/tests/test_bids2table.py b/tests/test_bids2table.py
index 2f18536..8ef1607 100644
--- a/tests/test_bids2table.py
+++ b/tests/test_bids2table.py
@@ -28,23 +28,23 @@ def empty_dataset(tmp_path: Path) -> Path:
@pytest.mark.parametrize("persistent", [False, True])
def test_bids2table(tmp_path: Path, persistent: bool):
root = BIDS_EXAMPLES / "ds001"
- output = tmp_path / "index.b2t"
+ index_path = tmp_path / "index.b2t"
- df = bids2table(root=root, persistent=persistent, output=output)
- assert df.shape == (128, 40)
+ tab = bids2table(root=root, persistent=persistent, index_path=index_path)
+ assert tab.shape == (128, 40)
# Reload from cache
- df2 = bids2table(root=root, persistent=persistent, output=output)
- assert df.equals(df2)
+ tab2 = bids2table(root=root, persistent=persistent, index_path=index_path)
+ assert tab.equals(tab2)
def test_bids2table_empty(empty_dataset: Path):
- df = bids2table(root=empty_dataset, persistent=True)
- assert df.shape == (0, 0)
+ tab = bids2table(root=empty_dataset, persistent=True)
+ assert tab.shape == (0, 0)
# Reload from cache
- df2 = bids2table(root=empty_dataset)
- assert df.equals(df2)
+ tab2 = bids2table(root=empty_dataset)
+ assert tab.equals(tab2)
def test_bids2table_nonexist(tmp_path: Path):
diff --git a/tests/test_extractors/test_entities.py b/tests/test_entities.py
similarity index 97%
rename from tests/test_extractors/test_entities.py
rename to tests/test_entities.py
index 9105fa4..5bd94c0 100644
--- a/tests/test_extractors/test_entities.py
+++ b/tests/test_entities.py
@@ -6,7 +6,7 @@
import pytest
from pytest import FixtureRequest
-from bids2table.extractors.entities import BIDSEntities, parse_bids_entities
+from bids2table.entities import BIDSEntities, parse_bids_entities
EXAMPLES = (
(
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
deleted file mode 100644
index 8515650..0000000
--- a/tests/test_helpers.py
+++ /dev/null
@@ -1,116 +0,0 @@
-from typing import Any, Dict, Optional
-
-import pandas as pd
-import pytest
-
-from bids2table.helpers import (
- flat_to_multi_columns,
- join_bids_path,
- multi_to_flat_columns,
-)
-
-
-@pytest.mark.parametrize(
- "row,prefix,valid_only,expected",
- [
- (
- {"sub": "A01", "ses": "b", "run": 2, "suffix": "bold", "ext": ".json"},
- None,
- False,
- "sub-A01/ses-b/sub-A01_ses-b_run-2_bold.json",
- ),
- (
- {"sub": "A01", "ses": "b", "run": 2, "suffix": "bold", "ext": ".json"},
- "dataset",
- False,
- "dataset/sub-A01/ses-b/sub-A01_ses-b_run-2_bold.json",
- ),
- (
- {
- "sub": "A01",
- "ses": "b",
- "run": 2,
- "extraKey": 1,
- "suffix": "bold",
- "ext": ".json",
- },
- None,
- False,
- "sub-A01/ses-b/sub-A01_ses-b_run-2_extraKey-1_bold.json",
- ),
- (
- {
- "sub": "A01",
- "ses": "b",
- "run": 2,
- "extraKey": 1,
- "suffix": "bold",
- "ext": ".json",
- },
- None,
- True,
- "sub-A01/ses-b/sub-A01_ses-b_run-2_bold.json",
- ),
- (
- {
- "entities": {
- "sub": "A01",
- "ses": "b",
- "run": 2,
- "suffix": "bold",
- "ext": ".json",
- }
- },
- None,
- False,
- "sub-A01/ses-b/sub-A01_ses-b_run-2_bold.json",
- ),
- (
- pd.concat(
- [
- pd.Series(
- {
- "sub": "A01",
- "ses": "b",
- "run": 2,
- "suffix": "bold",
- "ext": ".json",
- }
- )
- ],
- keys=["entities"],
- ),
- None,
- False,
- "sub-A01/ses-b/sub-A01_ses-b_run-2_bold.json",
- ),
- ],
-)
-def test_join_bids_path(
- row: Dict[str, Any], prefix: Optional[str], valid_only: bool, expected: str
-):
- path = join_bids_path(row, prefix=prefix, valid_only=valid_only)
- assert str(path) == expected
-
-
-@pytest.mark.parametrize("sep", ["__", "."])
-def test_flat_to_multi_columns(sep: str):
- df = pd.DataFrame(
- {
- f"A{sep}a": [1, 2, 3],
- f"A{sep}b": ["a", "b", "c"],
- f"B{sep}a": [4, 5, 6],
- f"B{sep}b": ["d", "e", "f"],
- }
- )
- multi_index = pd.MultiIndex.from_product([["A", "B"], ["a", "b"]])
-
- df_multi = flat_to_multi_columns(df, sep=sep)
- assert df_multi.columns.equals(multi_index)
-
- df_flat = multi_to_flat_columns(df_multi, sep=sep)
- assert df_flat.equals(df)
-
-
-if __name__ == "__main__":
- pytest.main([__file__])
diff --git a/tests/test_table.py b/tests/test_table.py
new file mode 100644
index 0000000..df2d60d
--- /dev/null
+++ b/tests/test_table.py
@@ -0,0 +1,207 @@
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Union
+
+import pandas as pd
+import pytest
+
+from bids2table import bids2table
+from bids2table.table import (
+ BIDSTable,
+ flat_to_multi_columns,
+ join_bids_path,
+ multi_to_flat_columns,
+)
+
+BIDS_EXAMPLES = Path(__file__).parent.parent / "bids-examples"
+
+
+@pytest.fixture(scope="module")
+def tab() -> BIDSTable:
+ return bids2table(BIDS_EXAMPLES / "ds001")
+
+
+def test_table(tab: BIDSTable):
+ assert tab.shape == (128, 40)
+
+ groups = tab.nested.columns.unique(0).tolist()
+ assert groups == ["ds", "ent", "meta", "finfo"]
+
+ assert tab.ds.shape == (128, 4)
+ assert tab.ent.shape == (128, 32)
+ assert tab.meta.shape == (128, 1)
+ assert tab.flat_meta.shape == (128, 2)
+ assert tab.finfo.shape == (128, 3)
+
+ subtab: BIDSTable = tab.iloc[:10]
+ assert subtab.ds.shape == (10, 4)
+
+ assert len(tab.datatypes) == 2
+ assert len(tab.modalities) == 1
+ assert len(tab.subjects) == 16
+ assert len(tab.entities) == 3
+
+
+def test_table_files(tab: BIDSTable):
+ files = tab.files
+ assert len(files) == 128
+
+ file = files[0]
+ assert file.path.exists()
+ assert (file.root / file.relative_path).exists()
+ assert file.metadata == {}
+
+
+@pytest.mark.parametrize(
+ "key,filter,expected_count",
+ [
+ ("sub", {"value": "04"}, 8),
+ ("subject", {"value": "04"}, 8),
+ ("RepetitionTime", {"value": 2.0}, 48),
+ ("subject", {"value": "04"}, 8),
+ ("sub", {"items": ["04", "06"]}, 16),
+ ("sub", {"contains": "4"}, 16),
+ ("sub", {"regex": "0[456]"}, 24),
+ ("RepetitionTime", {"func": lambda v: v <= 2.0}, 48),
+ ],
+)
+def test_table_filter(
+ tab: BIDSTable, key: str, filter: Dict[str, Any], expected_count: int
+):
+ subtab = tab.filter(key, **filter)
+ assert isinstance(subtab, BIDSTable)
+ assert len(subtab) == expected_count
+
+
+@pytest.mark.parametrize(
+ "filters,expected_count",
+ [
+ (
+ {
+ "dataset": "ds001",
+ "sub": {"items": ["04", "06"]},
+ "RepetitionTime": {"value": 2.0},
+ },
+ 6,
+ )
+ ],
+)
+def test_table_filter_multi(
+ tab: BIDSTable, filters: Dict[str, Any], expected_count: int
+):
+ subtab = tab.filter_multi(**filters)
+ assert isinstance(subtab, BIDSTable)
+ assert len(subtab) == expected_count
+
+
+@pytest.mark.parametrize("inplace", [True, False])
+@pytest.mark.parametrize("by", ["sub", ["subject"], ["dataset", "sub"]])
+def test_table_sort_entities(tab: BIDSTable, by: Union[str, List[str]], inplace: bool):
+ tab = tab.copy()
+ sort_tab = tab.sort_entities(by, inplace=inplace)
+ assert isinstance(sort_tab, BIDSTable)
+ assert len(sort_tab) == len(tab)
+ assert sort_tab.subjects == sorted(tab.subjects)
+
+
+@pytest.mark.parametrize("sep", ["__", "."])
+def test_flat_to_multi_columns(sep: str):
+ df = pd.DataFrame(
+ {
+ f"A{sep}a": [1, 2, 3],
+ f"A{sep}b": ["a", "b", "c"],
+ f"B{sep}a": [4, 5, 6],
+ f"B{sep}b": ["d", "e", "f"],
+ }
+ )
+ multi_index = pd.MultiIndex.from_product([["A", "B"], ["a", "b"]])
+
+ df_multi = flat_to_multi_columns(df, sep=sep)
+ assert df_multi.columns.equals(multi_index)
+
+ df_flat = multi_to_flat_columns(df_multi, sep=sep)
+ assert df_flat.equals(df)
+
+
+@pytest.mark.parametrize(
+ "entities,prefix,valid_only,expected",
+ [
+ (
+ {"sub": "A01", "ses": "b", "run": 2, "suffix": "bold", "ext": ".json"},
+ None,
+ False,
+ "sub-A01/ses-b/sub-A01_ses-b_run-2_bold.json",
+ ),
+ (
+ {"sub": "A01", "ses": "b", "run": 2, "suffix": "bold", "ext": ".json"},
+ "dataset",
+ False,
+ "dataset/sub-A01/ses-b/sub-A01_ses-b_run-2_bold.json",
+ ),
+ (
+ {
+ "sub": "A01",
+ "ses": "b",
+ "run": 2,
+ "extraKey": 1,
+ "suffix": "bold",
+ "ext": ".json",
+ },
+ None,
+ False,
+ "sub-A01/ses-b/sub-A01_ses-b_run-2_extraKey-1_bold.json",
+ ),
+ (
+ {
+ "sub": "A01",
+ "ses": "b",
+ "run": 2,
+ "extraKey": 1,
+ "suffix": "bold",
+ "ext": ".json",
+ },
+ None,
+ True,
+ "sub-A01/ses-b/sub-A01_ses-b_run-2_bold.json",
+ ),
+ (
+ pd.Series(
+ {
+ "sub": "A01",
+ "ses": "b",
+ "run": 2,
+ "suffix": "bold",
+ "ext": ".json",
+ }
+ ),
+ None,
+ False,
+ "sub-A01/ses-b/sub-A01_ses-b_run-2_bold.json",
+ ),
+ # Make sure it still works if applied to the raw df
+ (
+ {
+ "ds__dataset": "ds001",
+ "ent__sub": "A01",
+ "ent__ses": "b",
+ "ent__run": 2,
+ "suffix": "bold",
+ "ext": ".json",
+ },
+ None,
+ False,
+ "sub-A01/ses-b/sub-A01_ses-b_run-2_bold.json",
+ ),
+ ],
+)
+def test_join_bids_path(
+ entities: Union[Dict[str, Any], pd.Series],
+ prefix: Optional[str],
+ valid_only: bool,
+ expected: str,
+):
+ path = join_bids_path(entities, prefix=prefix, valid_only=valid_only)
+ assert str(path) == expected
+
+
+if __name__ == "__main__":
+ pytest.main([__file__])