Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Parallel specification of next API #999

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file added bids_ng/__init__.py
Empty file.
119 changes: 119 additions & 0 deletions bids_ng/dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import os
import re
import typing as ty
from functools import cached_property
from pathlib import Path

import bidsschematools as bst # type: ignore[import]
import bidsschematools.schema # type: ignore[import]
import bidsschematools.types # type: ignore[import]

from . import types as bt


class BIDSValidationError(ValueError):
"""Error arising from invalid files or values in a BIDS dataset"""


class Schema:
schema: bst.types.Namespace

def __init__(
self,
schema: ty.Union[bst.types.Namespace, None] = None,
):
if schema is None:
# Bundled
schema = bst.schema.load_schema()
self.schema = schema

@classmethod
def from_spec(cls, schema_spec: str) -> "Schema":
return cls(bst.schema.load_schema(schema_spec))

# Conveniences to avoid `schema.schema` pattern
@property
def objects(self) -> bst.types.Namespace:
return self.schema.objects

@property
def rules(self) -> bst.types.Namespace:
return self.schema.rules

@property
def meta(self) -> bst.types.Namespace:
return self.schema.meta


default_schema = Schema()


class File(bt.File[Schema]):
"""Generic file holder

This serves as a base class for :class:`BIDSFile` and can represent
non-BIDS files.
"""

def __init__(
self,
path: ty.Union[os.PathLike, str],
dataset: ty.Optional["BIDSDataset"] = None,
):
self.path = Path(path)
self.dataset = dataset


class BIDSFile(File, bt.BIDSFile[Schema]):
"""BIDS file"""

pattern = re.compile(
r"""
(?:(?P<entities>(?:[a-z]+-[a-zA-Z0-9]+(?:_[a-z]+-[a-zA-Z0-9]+)*))_)?
(?P<suffix>[a-zA-Z0-9]+)
(?P<extension>\.[^/\\]+)$
""",
re.VERBOSE,
)

def __init__(
self,
path: ty.Union[os.PathLike, str],
dataset: ty.Optional["BIDSDataset"] = None,
):
super().__init__(path, dataset)
self.entities = {}
self.datatype = None
self.suffix = None
self.extension = None

schema = default_schema if dataset is None else dataset.schema

if self.path.parent.name in schema.objects.datatypes:
self.datatype = self.path.parent.name

matches = self.pattern.match(self.path.name)
if matches is None:
return

entities, self.suffix, self.extension = matches.groups()

if entities:
found_entities = dict(ent.split("-") for ent in entities.split("_"))
self.entities = {
key: bt.Index(value) if entity.format == "index" else value
for key, entity in schema.rules.entities.items()
if (value := found_entities.get(entity.name)) is not None
}

@cached_property
def metadata(self) -> dict[str, ty.Any]:
"""Sidecar metadata aggregated according to inheritance principle"""
if not self.dataset:
raise ValueError
# TODO
return {}


class BIDSDataset(bt.BIDSDataset[Schema]):
...
18 changes: 18 additions & 0 deletions bids_ng/types/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from .api1 import BIDSDataset, BIDSFile, File, Index, Label
from .enums import Query
from .utils import PaddedInt

NONE, REQUIRED, OPTIONAL = tuple(Query)

__all__ = (
"BIDSDataset",
"BIDSFile",
"File",
"Index",
"Label",
"NONE",
"OPTIONAL",
"REQUIRED",
"Query",
"PaddedInt",
)
118 changes: 118 additions & 0 deletions bids_ng/types/api1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
"""PyBIDS 1.0 API specification"""

from pathlib import Path
from typing import Any, Dict, List, Optional, Protocol, TypeVar, Union

from .utils import PaddedInt

try:
from typing import TypeAlias
except ImportError:
from typing_extensions import TypeAlias


# Datasets should be parameterizable on some kind of schema object.
# External API users should not depend on it, so this is bound to Any,
# but once a Schema type is defined for an API implementation, type checkers
# should be able to introspect it.
SchemaT = TypeVar("SchemaT")


Index: TypeAlias = PaddedInt
Label: TypeAlias = str


class File(Protocol[SchemaT]):
"""Generic file holder

This serves as a base class for :class:`BIDSFile` and can represent
non-BIDS files.
"""

path: Path
dataset: Optional["BIDSDataset[SchemaT]"]

def __fspath__(self) -> str:
return str(self.path)

@property
def relative_path(self) -> Path:
if self.dataset is None:
raise ValueError("No dataset root to construct relative path from")
return self.path.relative_to(self.dataset.root)


class BIDSFile(File[SchemaT], Protocol):
"""BIDS file

This provides access to BIDS concepts such as path components
and sidecar metadata.

BIDS paths take the form::

[sub-<label>/[ses-<label>/]<datatype>/]<entities>_<suffix><extension>
"""

entities: Dict[str, Union[Label, Index]]
datatype: Optional[str]
suffix: Optional[str]
extension: Optional[str]

@property
def metadata(self) -> Dict[str, Any]:
"""Sidecar metadata aggregated according to inheritance principle"""


class BIDSDataset(Protocol[SchemaT]):
"""Interface to a single BIDS dataset.

This structure does not consider the contents of sub-datasets
such as `sourcedata/` or `derivatives/`.
"""

root: Path
schema: SchemaT

dataset_description: Dict[str, Any]
"""Contents of dataset_description.json"""

ignored: List[File[SchemaT]]
"""Invalid files found in dataset"""

files: List[BIDSFile[SchemaT]]
"""Valid files found in dataset"""

datatypes: List[str]
"""Datatype directories found in dataset"""

modalities: List[str]
"""BIDS "modalities" found in dataset"""

subjects: List[str]
"""Subject/participant identifiers found in the dataset"""

entities: List[str]
"""Entities (long names) found in any filename in the dataset"""

def get(self, **filters) -> List[BIDSFile[SchemaT]]:
"""Query dataset for files"""

def get_entities(self, entity: str, **filters) -> List[Label | Index]:
"""Query dataset for entity values"""

def get_metadata(self, term: str, **filters) -> List[Any]:
"""Query dataset for metadata values"""


class DatasetCollection(BIDSDataset[SchemaT], Protocol):
"""Interface to a collection of BIDS dataset.

This structure allows the user to construct a single view of
multiple datasets, such as including source or derivative datasets.
"""

primary: BIDSDataset[SchemaT]
datasets: List[BIDSDataset[SchemaT]]

def add_dataset(self, dataset: BIDSDataset[SchemaT]) -> None:
...
17 changes: 17 additions & 0 deletions bids_ng/types/enums.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from enum import Enum


class Query(Enum):
"""Special arguments for dataset querying

* `Query.NONE` - The field MUST NOT be present
* `Query.REQUIRED` - The field MUST be present, but may take any value
* `Query.OPTIONAL` - The field MAY be present, and may take any value

`Query.ANY` is a synonym for `Query.REQUIRED`. Its use is discouraged
and may be removed in the future.
"""

NONE = 1
REQUIRED = ANY = 2
OPTIONAL = 3
76 changes: 76 additions & 0 deletions bids_ng/types/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import typing as ty


class PaddedInt(int):
"""Integer type that preserves zero-padding

Acts like an int in almost all ways except that string formatting
will keep the original zero-padding. Numeric format specifiers will
work with the integer value.

>>> PaddedInt(1)
1
>>> p2 = PaddedInt("02")
>>> p2
02
>>> str(p2)
'02'
>>> p2 == 2
True
>>> p2 in range(3)
True
>>> f"{p2}"
'02'
>>> f"{p2:s}"
'02'
>>> f"{p2!s}"
'02'
>>> f"{p2!r}"
'02'
>>> f"{p2:d}"
'2'
>>> f"{p2:03d}"
'002'
>>> f"{p2:f}"
'2.000000'
>>> {2: "val"}.get(p2)
'val'
>>> {p2: "val"}.get(2)
'val'

Note that arithmetic will break the padding.

>>> str(p2 + 1)
'3'
"""

def __init__(self, val: ty.Union[str, int]) -> None:
self.sval = str(val)
if not self.sval.isdigit():
raise TypeError(
f"{self.__class__.__name__}() argument must be a string of digits "
f"or int, not {val.__class__.__name__!r}"
)

def __eq__(self, val: object) -> bool:
return val == self.sval or super().__eq__(val)

def __str__(self) -> str:
return self.sval

def __repr__(self) -> str:
return self.sval

def __format__(self, format_spec: str) -> str:
"""Format a padded integer

If a format spec can be used on a string, apply it to the zero-padded string.
Otherwise format as an integer.
"""
try:
return format(self.sval, format_spec)
except ValueError:
return super().__format__(format_spec)

def __hash__(self) -> int:
return super().__hash__()