Skip to content

Commit

Permalink
Merge pull request #296 from kaitj/pybids_db
Browse files Browse the repository at this point in the history
Update pybids_db parameters
  • Loading branch information
kaitj authored Jun 7, 2023
2 parents cda292e + fe425de commit ec0af2e
Show file tree
Hide file tree
Showing 11 changed files with 95 additions and 68 deletions.
4 changes: 2 additions & 2 deletions docs/bids_app/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,11 @@ pybids_inputs:
- run
```

### `pybids_db_dir`
### `pybidsdb_dir`

PyBIDS allows for the use of a cached layout to be used in order to reduce the time required to index a BIDS dataset. A path (if provided) to save the *pybids* [layout](#bids.layout.BIDSLayout). If `None` or `''` is provided, the layout is not saved or used. The path provided must be absolute, otherwise the database will not be used.

### `pybids_db_reset`
### `pybidsdb_reset`

A boolean determining whether the existing layout should be be updated. Default behaviour does not update the existing database if one is used.

Expand Down
4 changes: 2 additions & 2 deletions docs/bids_app/workflow.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ configfile: 'config/snakebids.yml'
inputs = snakebids.generate_inputs(
bids_dir=config["bids_dir"],
pybids_inputs=config["pybids_inputs"],
pybids_database_dir=config.get("pybids_db_dir"),
pybids_reset_database=config.get("pybids_db_reset"),
pybidsdb_dir=config.get("pybidsdb_dir"),
pybidsdb_reset=config.get("pybidsdb_reset"),
derivatives=config.get("derivatives"),
participant_label=config.get("participant_label"),
exclude_participant_label=config.get("exclude_participant_label"),
Expand Down
8 changes: 5 additions & 3 deletions docs/running_snakebids/overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@ Note that if any rules in the Snakebids workflow use Singularity containers, spe
1. Inputs are copied into a working subdirectory of the output directory before any processing that requires a Singularity container is performed, or:
2. The `SINGULARITY_BINDPATH` environment variable binds the location of the input dataset.

Indexing of large datasets can be a time-consuming process. Snakebids, through `PyBIDS` has the ability to create or leverage an existing database, requiring indexing of datasets to be only performed when user chooses to do so (usually if the dataset has changed)! Note, this feature is **opt-in**, meaning it is not used unless the associated config variables are used. To opt-in:
Indexing of large datasets can be a time-consuming process. Leveraging the functionality of `PyBIDS`, Snakebids offers a convenient solution by allowing you to create or utilize an existing database. With this approach, the indexing of datasets is only performed when explictly requested, typically when there are changes to the dataset. To create or use an existing database, you can invoke the following CLI arguments:

1. Uncomment the lines in `snakebids.yml` containing `pybids_db_dir` and `pybids_db_reset`.
1. The variables can be updated directly in this file or through the CLI by using `-pybidsdb-dir {dir}` to specify the database path and `--reset-db` to indicate that the database should be updated. _Note: CLI arguments take precendence if both CLI and config variables are set._
1. `--pybidsdb-dir {dir}`: specify the path to the database directory
1. `--pybidsdb-reset`: indicate that an existing database should be updated

It's important to note that this indexing feature is **disabled by default**, meaning Snakebids does not create or expect to find a database unless it has been explictly set using the associated CLI arguments.

Workflow mode
=============
Expand Down
5 changes: 2 additions & 3 deletions snakebids/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,8 @@ def run_snakemake(self) -> None:
self.config["snakefile"] = self.snakefile_path

# Update config with pybids settings
if args.pybidsdb_dir:
self.config["pybids_db_dir"] = args.pybidsdb_dir
self.config["pybids_db_reset"] = args.reset_db
self.config["pybidsdb_dir"] = args.pybidsdb_dir
self.config["pybidsdb_reset"] = args.pybidsdb_reset

update_config(self.config, args)

Expand Down
21 changes: 17 additions & 4 deletions snakebids/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ class SnakebidsArgs:
snakemake_args: list[str]
args_dict: dict[str, Any]
pybidsdb_dir: Optional[Path] = None
reset_db: bool = False
pybidsdb_reset: bool = False


def create_parser(include_snakemake: bool = False) -> argparse.ArgumentParser:
Expand Down Expand Up @@ -138,15 +138,23 @@ def create_parser(include_snakemake: bool = False) -> argparse.ArgumentParser:
help=(
"Optional path to directory of SQLite databasefile for PyBIDS. "
"If directory is passed and folder exists, indexing is skipped. "
"If reset_db is called, indexing will persist"
"If pybidsdb_reset is called, indexing will persist"
),
)

standard_group.add_argument(
"--pybidsdb-reset",
"--pybidsdb_reset",
action="store_true",
help=("Reindex existing PyBIDS SQLite database"),
)

# To be deprecated
standard_group.add_argument(
"--reset-db",
"--reset_db",
action="store_true",
help=("Reindex existing PyBIDS SQLite database"),
help=argparse.SUPPRESS,
)

standard_group.add_argument(
Expand Down Expand Up @@ -272,6 +280,11 @@ def parse_snakebids_args(parser: argparse.ArgumentParser) -> SnakebidsArgs:
)
if all_args[0].force_conversion:
logger.warning("--force-conversion is deprecated and no longer has any effect.")
if all_args[0].reset_db:
logger.warning(
"--reset-db/--reset_db will be deprecated in a future release. To reset "
"the pybids database, use the new --pybidsdb-reset flag instead."
)
return SnakebidsArgs(
snakemake_args=all_args[1],
# resolve all path items to get absolute paths
Expand All @@ -283,7 +296,7 @@ def parse_snakebids_args(parser: argparse.ArgumentParser) -> SnakebidsArgs:
if all_args[0].pybidsdb_dir is None
else Path(all_args[0].pybidsdb_dir).resolve()
),
reset_db=all_args[0].reset_db,
pybidsdb_reset=all_args[0].pybidsdb_reset or all_args[0].reset_db,
)


Expand Down
59 changes: 39 additions & 20 deletions snakebids/core/input_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,16 @@
def generate_inputs( # noqa: PLR0913
bids_dir: Path | str,
pybids_inputs: InputsConfig,
pybids_database_dir: Path | str | None = ...,
pybids_reset_database: bool = ...,
pybidsdb_dir: Path | str | None = ...,
pybidsdb_reset: bool = ...,
derivatives: bool | Path | str = ...,
pybids_config: str | None = ...,
limit_to: Iterable[str] | None = ...,
participant_label: Iterable[str] | str | None = ...,
exclude_participant_label: Iterable[str] | str | None = ...,
use_bids_inputs: Literal[True] | None = ...,
pybids_database_dir: Path | str | None = ...,
pybids_reset_database: bool = ...,
) -> BidsDataset:
...

Expand All @@ -49,29 +51,33 @@ def generate_inputs( # noqa: PLR0913
def generate_inputs( # noqa: PLR0913
bids_dir: Path | str,
pybids_inputs: InputsConfig,
pybids_database_dir: Path | str | None = ...,
pybids_reset_database: bool = ...,
pybidsdb_dir: Path | str | None = ...,
pybidsdb_reset: bool = ...,
derivatives: bool | Path | str = ...,
pybids_config: str | None = ...,
limit_to: Iterable[str] | None = ...,
participant_label: Iterable[str] | str | None = ...,
exclude_participant_label: Iterable[str] | str | None = ...,
use_bids_inputs: Literal[False] = ...,
pybids_database_dir: Path | str | None = ...,
pybids_reset_database: bool = ...,
) -> BidsDatasetDict:
...


def generate_inputs( # noqa: PLR0913
bids_dir: Path | str,
pybids_inputs: InputsConfig,
pybids_database_dir: Path | str | None = None,
pybids_reset_database: bool = False,
pybidsdb_dir: Path | str | None = None,
pybidsdb_reset: bool = False,
derivatives: bool | Path | str = False,
pybids_config: str | None = None,
limit_to: Iterable[str] | None = None,
participant_label: Iterable[str] | str | None = None,
exclude_participant_label: Iterable[str] | str | None = None,
use_bids_inputs: bool | None = None,
pybids_database_dir: Path | str | None = None,
pybids_reset_database: bool = False,
) -> BidsDataset | BidsDatasetDict:
"""Dynamically generate snakemake inputs using pybids_inputs
Expand Down Expand Up @@ -105,11 +111,11 @@ def generate_inputs( # noqa: PLR0913
as in ``/path/to/sub-{subject}/{wildcard_1}-{wildcard_2}``. This path will be
parsed without pybids, allowing the use of non-bids-compliant paths.
pybids_database_dir
pybidsdb_dir
Path to database directory. If None is provided, database
is not used
pybids_reset_database
pybidsdb_reset
A boolean that determines whether to reset / overwrite
existing database.
Expand Down Expand Up @@ -239,14 +245,27 @@ def generate_inputs( # noqa: PLR0913
participant_label, exclude_participant_label
)

if pybids_database_dir:
_logger.warning(
"The parameter `pybids_database_dir` in generate_inputs() is deprecated "
"and will be removed in the next release. To set the pybids database, use "
"the `pybidsdb_dir` parameter instead."
)
if pybids_reset_database:
_logger.warning(
"The parameter `pybids_reset_database` in generate_inputs() is deprecated "
"and will be removed in the next release. To reset the pybids database, "
"use the `pybidsdb_reset` parameter instead."
)

# Generates a BIDSLayout
layout = (
_gen_bids_layout(
bids_dir=bids_dir,
derivatives=derivatives,
pybids_config=pybids_config,
pybids_database_dir=pybids_database_dir,
pybids_reset_database=pybids_reset_database,
pybidsdb_dir=pybidsdb_dir or pybids_database_dir,
pybidsdb_reset=pybidsdb_reset or pybids_reset_database,
)
if not _all_custom_paths(pybids_inputs)
else None
Expand Down Expand Up @@ -290,8 +309,8 @@ def _all_custom_paths(config: InputsConfig):
def _gen_bids_layout(
bids_dir: Path | str,
derivatives: Path | str | bool,
pybids_database_dir: Path | str | None,
pybids_reset_database: bool,
pybidsdb_dir: Path | str | None,
pybidsdb_reset: bool,
pybids_config: Path | str | None = None,
) -> BIDSLayout:
"""Create (or reindex) the BIDSLayout if one doesn't exist,
Expand All @@ -307,11 +326,11 @@ def _gen_bids_layout(
determines whether snakebids will search in the
derivatives subdirectory of the input dataset.
pybids_database_dir
pybidsdb_dir
Path to database directory. If None is provided, database
is not used
pybids_reset_database
pybidsdb_reset
A boolean that determines whether to reset / overwrite
existing database.
Expand All @@ -323,20 +342,20 @@ def _gen_bids_layout(

# Check for database_dir
# If blank, assume db not to be used
if not pybids_database_dir:
pybids_database_dir = None
if not pybidsdb_dir:
pybidsdb_dir = None
# Otherwise check for relative path and update
elif not Path(pybids_database_dir).is_absolute():
pybids_database_dir = None
elif not Path(pybidsdb_dir).is_absolute():
pybidsdb_dir = None
_logger.warning("Absolute path must be provided, database will not be used")

return BIDSLayout(
str(bids_dir),
derivatives=derivatives,
validate=False,
config=pybids_config,
database_path=pybids_database_dir,
reset_database=pybids_reset_database,
database_path=pybidsdb_dir,
reset_database=pybidsdb_reset,
indexer=BIDSLayoutIndexer(validate=False, index_metadata=False),
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,6 @@ pybids_inputs:
- task
- run

#this configures the options to save the BIDSLayout
# by default, database is not saved (uncomment to save)
# NOTE: pybids_db_dir must be an absolute path
# pybids_db_dir: '/path/to/db_dir' # Leave blank if you do not wish to use this
# pybids_db_reset: False # Change this to true to update the database

#configuration for the command-line parameters to make available
# passed on the argparse add_argument()
parse_args:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ configfile: workflow.source_path('../config/snakebids.yml')
inputs = generate_inputs(
bids_dir=config["bids_dir"],
pybids_inputs=config["pybids_inputs"],
pybids_database_dir=config.get("pybids_db_dir"),
pybids_reset_database=config.get("pybids_db_reset"),
pybidsdb_dir=config.get("pybidsdb_dir"),
pybidsdb_reset=config.get("pybidsdb_reset"),
derivatives=config.get("derivatives", None),
participant_label=config.get("participant_label", None),
exclude_participant_label=config.get("exclude_participant_label", None),
Expand Down
4 changes: 2 additions & 2 deletions snakebids/tests/mock/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ pybids_inputs:
- task
- run

pybids_db_dir: '/path/to/db_dir'
pybids_db_reset: False
pybidsdb_dir: '/path/to/db_dir'
pybidsdb_reset: False

targets_by_analysis_level:
participant:
Expand Down
8 changes: 4 additions & 4 deletions snakebids/tests/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def app(mocker: MockerFixture):
)
app.config["analysis_level"] = "participant"
app.config["snakemake_args"] = []
app.config["pybids_db_reset"] = False
app.config["pybidsdb_reset"] = False
mocker.patch.object(sn_app, "update_config", return_value=app.config)
return app

Expand Down Expand Up @@ -97,8 +97,8 @@ def test_runs_in_correct_mode(
{
"root": "",
"snakemake_dir": Path("app").resolve(),
"pybids_db_dir": Path("/tmp/output/.db"),
"pybids_db_reset": True,
"pybidsdb_dir": Path("/tmp/output/.db"),
"pybidsdb_reset": True,
"snakefile": Path("Snakefile"),
"output_dir": outputdir.resolve(),
}
Expand All @@ -115,7 +115,7 @@ def test_runs_in_correct_mode(
# patched
args_dict={"output_dir": outputdir.resolve()},
pybidsdb_dir=Path("/tmp/output/.db"),
reset_db=True,
pybidsdb_reset=True,
)

try:
Expand Down
Loading

0 comments on commit ec0af2e

Please sign in to comment.