From 5119e5efc6ec5670c194376097f42c89be0b691c Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Mon, 27 May 2024 18:54:20 -0400 Subject: [PATCH 01/20] feat: Create validation context classes from schema.meta.context --- src/bids_validator/context.py | 36 +++++ src/bids_validator/context_generator.py | 175 ++++++++++++++++++++++++ 2 files changed, 211 insertions(+) create mode 100644 src/bids_validator/context.py create mode 100644 src/bids_validator/context_generator.py diff --git a/src/bids_validator/context.py b/src/bids_validator/context.py new file mode 100644 index 0000000..e07ca07 --- /dev/null +++ b/src/bids_validator/context.py @@ -0,0 +1,36 @@ +from .context_generator import get_schema, load_schema_into_namespace + +schema = get_schema() +load_schema_into_namespace(schema['meta']['context']['context'], globals(), 'Context') + + +__all__ = [ # noqa: F822 + 'Context', + 'Schema', + 'Dataset', + 'Dataset_description', + 'Tree', + 'Subjects', + 'Subject', + 'Sessions', + 'Entities', + 'Sidecar', + 'Associations', + 'Events', + 'Aslcontext', + 'M0scan', + 'Magnitude', + 'Magnitude1', + 'Bval', + 'Bvec', + 'Channels', + 'Coordsystem', + 'Columns', + 'Json', + 'Gzip', + 'Nifti_header', + 'Dim_info', + 'Xyzt_units', + 'Ome', + 'Tiff', +] diff --git a/src/bids_validator/context_generator.py b/src/bids_validator/context_generator.py new file mode 100644 index 0000000..79c5c42 --- /dev/null +++ b/src/bids_validator/context_generator.py @@ -0,0 +1,175 @@ +from typing import Any, Dict, List + +import attrs +import bidsschematools as bst +import bidsschematools.schema +import httpx + +LATEST_SCHEMA_URL = 'https://bids-specification.readthedocs.io/en/latest/schema.json' +STABLE_SCHEMA_URL = 'https://bids-specification.readthedocs.io/en/stable/schema.json' + + +def get_schema(url: str | None = None) -> Dict[str, Any]: + """Load a BIDS schema from a URL or return the bundled schema if no URL is provided. + + Parameters + ---------- + url : str | None + The URL to load the schema from. If None, the bundled schema is returned. + The strings 'latest' and 'stable' are also accepted as shortcuts. + + Returns + ------- + Dict[str, Any] + The loaded schema as a dictionary. + + """ + if url is None: + return bst.schema.load_schema() + + if url == 'latest': + url = LATEST_SCHEMA_URL + elif url == 'stable': + url = STABLE_SCHEMA_URL + + with httpx.Client() as client: + return client.get(url).json() + + +def create_attrs_class( + class_name: str, description: str | None, properties: Dict[str, Any] +) -> type: + """Dynamically create an attrs class with the given properties. + + Parameters + ---------- + class_name : str + The name of the class to create. + description : str | None + A short description of the class, included in the docstring. + properties : Dict[str, Any] + A dictionary of property names and their corresponding schema information. + If a nested object is encountered, a nested class is created. + + Returns + ------- + cls : type + The dynamically created attrs class. + + """ + attributes = {} + for prop_name, prop_info in properties.items(): + prop_type = prop_info.get('type') + + if prop_type == 'object': + nested_class = create_attrs_class( + prop_name.capitalize(), + prop_info.get('description'), + prop_info.get('properties', {}), + ) + attributes[prop_name] = attrs.field(type=nested_class, default=None) + elif prop_type == 'array': + item_info = prop_info.get('items', {}) + item_type = item_info.get('type') + + if item_type == 'object': + nested_class = create_attrs_class( + prop_name.capitalize(), + item_info.get('description'), + item_info.get('properties', {}), + ) + attributes[prop_name] = attrs.field(type=List[nested_class], default=None) + else: + # Default to List[Any] for arrays of simple types + attributes[prop_name] = attrs.field(type=List[Any], default=None) + else: + # Default to Any for simple types + attributes[prop_name] = attrs.field(type=Any, default=None) + + return attrs.make_class( + class_name, + attributes, + class_body={ + '__doc__': f"""\ +{description} + +attrs data class auto-generated from BIDS schema + +Attributes +---------- +{"".join([f"{k}: {v.type.__name__}\n" for k, v in attributes.items()])} +""" + }, + ) + + +def generate_attrs_classes_from_schema( + schema: Dict[str, Any], + root_class_name: str, +) -> type: + """Generate attrs classes from a JSON schema. + + Parameters + ---------- + schema : Dict[str, Any] + The JSON schema to generate classes from. Must contain a 'properties' field. + root_class_name : str + The name of the root class to create. + + Returns + ------- + cls : type + The root class created from the schema. + + """ + if 'properties' not in schema: + raise ValueError("Invalid schema: 'properties' field is required") + + return create_attrs_class( + root_class_name, + schema.get('description'), + schema['properties'], + ) + + +def populate_namespace(attrs_class: type, namespace: Dict[str, Any]) -> None: + """Populate a namespace with nested attrs classes + + Parameters + ---------- + attrs_class : type + The root attrs class to add to the namespace. + namespace : Dict[str, Any] + The namespace to populate with nested classes. + + """ + for attr in attrs_class.__attrs_attrs__: + attr_type = attr.type + + if isinstance(attr_type, type) and hasattr(attr_type, '__attrs_attrs__'): + namespace[attr_type.__name__] = attr_type + populate_namespace(attr_type, namespace) + + +def load_schema_into_namespace( + schema: Dict[str, Any], + namespace: Dict[str, Any], + root_class_name: str, +) -> None: + """Load a JSON schema into a namespace as attrs classes. + + Intended to be used with globals() or locals() to create classes in the current module. + + Parameters + ---------- + schema : Dict[str, Any] + The JSON schema to load into the namespace. + namespace : Dict[str, Any] + The namespace to load the schema into. + root_class_name : str + The name of the root class to create. + + """ + attrs_class = generate_attrs_classes_from_schema(schema, root_class_name) + namespace[root_class_name] = attrs_class + populate_namespace(attrs_class, namespace) From 33507410e5dccd0d5c1a67ec15ca8dbb8773b34f Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Mon, 27 May 2024 19:43:05 -0400 Subject: [PATCH 02/20] chore: Add attrs and httpx to dependencies --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 6764b15..12e3635 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,6 +23,8 @@ classifiers = [ requires-python = ">=3.8" dependencies = [ "bidsschematools >=0.11", + "attrs", + "httpx", ] [project.optional-dependencies] From 699c0ccacbeb823ea03116a6803a451649fa6e08 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Mon, 27 May 2024 20:25:56 -0400 Subject: [PATCH 03/20] fix: Nested f-strings are not permitted in older Python --- src/bids_validator/context_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bids_validator/context_generator.py b/src/bids_validator/context_generator.py index 79c5c42..a34a829 100644 --- a/src/bids_validator/context_generator.py +++ b/src/bids_validator/context_generator.py @@ -97,8 +97,8 @@ def create_attrs_class( Attributes ---------- -{"".join([f"{k}: {v.type.__name__}\n" for k, v in attributes.items()])} """ + + '\n'.join([f'{k}: {v.type.__name__}' for k, v in attributes.items()]), }, ) From a70e00b733594cf08cc119a05865f8476edf0cc7 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Mon, 27 May 2024 20:30:06 -0400 Subject: [PATCH 04/20] DOC: Add module docstrings --- src/bids_validator/context.py | 2 ++ src/bids_validator/context_generator.py | 15 +++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/src/bids_validator/context.py b/src/bids_validator/context.py index e07ca07..38db217 100644 --- a/src/bids_validator/context.py +++ b/src/bids_validator/context.py @@ -1,3 +1,5 @@ +"""Validation context for schema-based BIDS validation.""" + from .context_generator import get_schema, load_schema_into_namespace schema = get_schema() diff --git a/src/bids_validator/context_generator.py b/src/bids_validator/context_generator.py index a34a829..f439f57 100644 --- a/src/bids_validator/context_generator.py +++ b/src/bids_validator/context_generator.py @@ -1,3 +1,18 @@ +"""Utilities for generating validation context classes from a BIDS schema. + +For default contexts based on the installed BIDS schema, use the `context` module. +These functions allow generating classes from alternative schemas. + +Basic usage: + +.. python:: + + from bids_validator.context_generator import get_schema, load_schema_into_namespace + + schema = get_schema('https://bids-specification.readthedocs.io/en/stable/schema.json') + load_schema_into_namespace(schema['meta']['context']['context'], globals(), 'Context') +""" + from typing import Any, Dict, List import attrs From a6e2e375fdf0768960b9f3057a28967c98d43d87 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Mon, 27 May 2024 20:34:42 -0400 Subject: [PATCH 05/20] doc: Update docstrings with pydocstyle fixes --- src/bids_validator/context_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bids_validator/context_generator.py b/src/bids_validator/context_generator.py index f439f57..94b8ace 100644 --- a/src/bids_validator/context_generator.py +++ b/src/bids_validator/context_generator.py @@ -148,7 +148,7 @@ def generate_attrs_classes_from_schema( def populate_namespace(attrs_class: type, namespace: Dict[str, Any]) -> None: - """Populate a namespace with nested attrs classes + """Populate a namespace with nested attrs classes. Parameters ---------- From 1d4638c6b2ed29d549dbb606ac6338889516e1a5 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Tue, 18 Jun 2024 18:04:10 -0400 Subject: [PATCH 06/20] PY39: Use explicit Union --- src/bids_validator/context_generator.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/bids_validator/context_generator.py b/src/bids_validator/context_generator.py index 94b8ace..b9ce262 100644 --- a/src/bids_validator/context_generator.py +++ b/src/bids_validator/context_generator.py @@ -13,7 +13,7 @@ load_schema_into_namespace(schema['meta']['context']['context'], globals(), 'Context') """ -from typing import Any, Dict, List +from typing import Any, Dict, List, Union import attrs import bidsschematools as bst @@ -24,7 +24,7 @@ STABLE_SCHEMA_URL = 'https://bids-specification.readthedocs.io/en/stable/schema.json' -def get_schema(url: str | None = None) -> Dict[str, Any]: +def get_schema(url: Union[str, None] = None) -> Dict[str, Any]: """Load a BIDS schema from a URL or return the bundled schema if no URL is provided. Parameters @@ -52,7 +52,7 @@ def get_schema(url: str | None = None) -> Dict[str, Any]: def create_attrs_class( - class_name: str, description: str | None, properties: Dict[str, Any] + class_name: str, description: Union[str, None], properties: Dict[str, Any] ) -> type: """Dynamically create an attrs class with the given properties. From 4058fce39b454ad95209c919b557f93c8d7b8233 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Tue, 18 Jun 2024 19:01:51 -0400 Subject: [PATCH 07/20] RF: Factor out typespec_to_type --- src/bids_validator/context.py | 8 +-- src/bids_validator/context_generator.py | 88 ++++++++++++++----------- 2 files changed, 54 insertions(+), 42 deletions(-) diff --git a/src/bids_validator/context.py b/src/bids_validator/context.py index 38db217..29d0294 100644 --- a/src/bids_validator/context.py +++ b/src/bids_validator/context.py @@ -10,7 +10,7 @@ 'Context', 'Schema', 'Dataset', - 'Dataset_description', + 'DatasetDescription', 'Tree', 'Subjects', 'Subject', @@ -30,9 +30,9 @@ 'Columns', 'Json', 'Gzip', - 'Nifti_header', - 'Dim_info', - 'Xyzt_units', + 'NiftiHeader', + 'DimInfo', + 'XyztUnits', 'Ome', 'Tiff', ] diff --git a/src/bids_validator/context_generator.py b/src/bids_validator/context_generator.py index b9ce262..a802186 100644 --- a/src/bids_validator/context_generator.py +++ b/src/bids_validator/context_generator.py @@ -13,6 +13,7 @@ load_schema_into_namespace(schema['meta']['context']['context'], globals(), 'Context') """ +import json from typing import Any, Dict, List, Union import attrs @@ -51,20 +52,56 @@ def get_schema(url: Union[str, None] = None) -> Dict[str, Any]: return client.get(url).json() +def snake_to_pascal(val: str): + """Convert snake_case string to PascalCase.""" + return ''.join(sub.capitalize() for sub in val.split('_')) + + +def typespec_to_type(name: str, typespec: Dict[str, Any]): + """Convert JSON-schema style specification to type and metadata dictionary.""" + tp = typespec.get('type') + if not tp: + raise ValueError(f'Invalid typespec: {json.dumps(typespec)}') + metadata = {key: typespec[key] for key in ('name', 'description') if key in typespec} + if tp == 'object': + properties = typespec.get('properties') + if properties: + type_ = create_attrs_class(name, properties=properties, metadata=metadata) + else: + type_ = Dict[str, Any] + elif tp == 'array': + if 'items' in typespec: + subtype, md = typespec_to_type(name, typespec['items']) + else: + subtype = Any + type_ = List[subtype] + else: + type_ = { + 'number': float, + 'float': float, # Fix in schema + 'string': str, + 'integer': int, + 'int': int, # Fix in schema + }[tp] + return type_, metadata + + def create_attrs_class( - class_name: str, description: Union[str, None], properties: Dict[str, Any] + class_name: str, + properties: Dict[str, Any], + metadata: Dict[str, Any], ) -> type: """Dynamically create an attrs class with the given properties. Parameters ---------- - class_name : str + class_name The name of the class to create. - description : str | None - A short description of the class, included in the docstring. - properties : Dict[str, Any] + properties A dictionary of property names and their corresponding schema information. If a nested object is encountered, a nested class is created. + metadata + A short description of the class, included in the docstring. Returns ------- @@ -74,39 +111,17 @@ def create_attrs_class( """ attributes = {} for prop_name, prop_info in properties.items(): - prop_type = prop_info.get('type') - - if prop_type == 'object': - nested_class = create_attrs_class( - prop_name.capitalize(), - prop_info.get('description'), - prop_info.get('properties', {}), - ) - attributes[prop_name] = attrs.field(type=nested_class, default=None) - elif prop_type == 'array': - item_info = prop_info.get('items', {}) - item_type = item_info.get('type') - - if item_type == 'object': - nested_class = create_attrs_class( - prop_name.capitalize(), - item_info.get('description'), - item_info.get('properties', {}), - ) - attributes[prop_name] = attrs.field(type=List[nested_class], default=None) - else: - # Default to List[Any] for arrays of simple types - attributes[prop_name] = attrs.field(type=List[Any], default=None) - else: - # Default to Any for simple types - attributes[prop_name] = attrs.field(type=Any, default=None) + type_, md = typespec_to_type(prop_name, prop_info) + attributes[prop_name] = attrs.field( + type=type_, repr=prop_name != 'schema', default=None, metadata=md + ) return attrs.make_class( - class_name, + snake_to_pascal(class_name), attributes, class_body={ '__doc__': f"""\ -{description} +{metadata.get('description', '')} attrs data class auto-generated from BIDS schema @@ -140,11 +155,8 @@ def generate_attrs_classes_from_schema( if 'properties' not in schema: raise ValueError("Invalid schema: 'properties' field is required") - return create_attrs_class( - root_class_name, - schema.get('description'), - schema['properties'], - ) + type_, _ = typespec_to_type(root_class_name, schema) + return type_ def populate_namespace(attrs_class: type, namespace: Dict[str, Any]) -> None: From 07f0f04da802ecbea3985c47ef84b2cf45ecce53 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Wed, 19 Jun 2024 09:59:40 +0900 Subject: [PATCH 08/20] feat(types): Add FileTree type --- src/bids_validator/types/__init__.py | 1 + src/bids_validator/types/files.py | 106 +++++++++++++++++++++++++++ 2 files changed, 107 insertions(+) create mode 100644 src/bids_validator/types/__init__.py create mode 100644 src/bids_validator/types/files.py diff --git a/src/bids_validator/types/__init__.py b/src/bids_validator/types/__init__.py new file mode 100644 index 0000000..d5e90e9 --- /dev/null +++ b/src/bids_validator/types/__init__.py @@ -0,0 +1 @@ +"""Modules for providing types.""" diff --git a/src/bids_validator/types/files.py b/src/bids_validator/types/files.py new file mode 100644 index 0000000..29f0936 --- /dev/null +++ b/src/bids_validator/types/files.py @@ -0,0 +1,106 @@ +"""Types for working with file trees.""" + +import os +import posixpath +import stat +from typing import Dict, Self, Union + +import attrs + + +@attrs.define +class DummyDirentry: + """Partial reimplementation of :class:`os.DirEntry`. + + :class:`os.DirEntry` can't be instantiated, but this can. + """ + + path: str = attrs.field(repr=False, converter=os.fspath) + name: str = attrs.field(init=False) + _stat: os.stat_result = attrs.field(init=False, repr=False, default=None) + _lstat: os.stat_result = attrs.field(init=False, repr=False, default=None) + + def __attrs_post_init__(self) -> None: + self.name = os.path.basename(self.path) + + def __fspath__(self) -> str: + return self.path + + def stat(self, *, follow_symlinks: bool = True) -> os.stat_result: + """Return stat_result object for the entry; cached per entry.""" + if follow_symlinks: + if self._stat is None: + self._stat = os.stat(self.path, follow_symlinks=True) + return self._stat + else: + if self._lstat is None: + self._lstat = os.stat(self.path, follow_symlinks=False) + return self._lstat + + def is_dir(self, *, follow_symlinks: bool = True) -> bool: + """Return True if the entry is a directory; cached per entry.""" + _stat = self.stat(follow_symlinks=follow_symlinks) + return stat.S_ISDIR(_stat.st_mode) + + def is_file(self, *, follow_symlinks: bool = True) -> bool: + """Return True if the entry is a file; cached per entry.""" + _stat = self.stat(follow_symlinks=follow_symlinks) + return stat.S_ISREG(_stat.st_mode) + + def is_symlink(self) -> bool: + """Return True if the entry is a symlink; cached per entry.""" + _stat = self.stat(follow_symlinks=False) + return stat.S_ISLNK(_stat.st_mode) + + +def as_direntry(obj: os.PathLike) -> Union[os.DirEntry, DummyDirentry]: + """Convert PathLike into DirEntry-like object.""" + if isinstance(obj, os.DirEntry): + return obj + return DummyDirentry(obj) + + +@attrs.define +class FileTree: + """Represent a FileTree with cached metadata.""" + + direntry: Union[os.DirEntry, DummyDirentry] = attrs.field(repr=False, converter=as_direntry) + parent: Union['FileTree', None] = attrs.field(repr=False, default=None) + is_dir: bool = attrs.field(default=False) + children: Dict[str, 'FileTree'] = attrs.field(repr=False, factory=dict) + name: str = attrs.field(init=False) + + def __attrs_post_init__(self): + self.name = self.direntry.name + self.children = { + name: attrs.evolve(child, parent=self) for name, child in self.children.items() + } + + @classmethod + def read_from_filesystem( + cls, + direntry: os.PathLike, + parent: Union['FileTree', None] = None, + ) -> Self: + """Read a FileTree from the filesystem. + + Uses :func:`os.scandir` to walk the directory tree. + """ + self = cls(direntry, parent=parent) + if self.direntry.is_dir(): + self.is_dir = True + self.children = { + entry.name: FileTree.read_from_filesystem(entry, parent=self) + for entry in os.scandir(self.direntry) + } + return self + + def __contains__(self, relpath: os.PathLike): + parts = posixpath.split(relpath) + if len(parts) == 0: + return False + child = self.children.get(parts[0], False) + return child and posixpath.join(*parts[1:]) in child + + def __fspath__(self): + return self.direntry.path From eb5f50f1e7f8794f7304c7215daa90812dfd83c5 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Wed, 19 Jun 2024 13:52:26 +0900 Subject: [PATCH 09/20] feat(bidsignore): Add initial bidsignore implementation --- src/bids_validator/bidsignore.py | 84 ++++++++++++++++++++++++++++++++ tests/__init__.py | 1 + tests/test_bidsignore.py | 48 ++++++++++++++++++ 3 files changed, 133 insertions(+) create mode 100644 src/bids_validator/bidsignore.py create mode 100644 tests/__init__.py create mode 100644 tests/test_bidsignore.py diff --git a/src/bids_validator/bidsignore.py b/src/bids_validator/bidsignore.py new file mode 100644 index 0000000..29ca9ff --- /dev/null +++ b/src/bids_validator/bidsignore.py @@ -0,0 +1,84 @@ +"""Utilities for working with .bidsignore files.""" + +import os +import re +from functools import lru_cache +from typing import List, Union + +import attrs + +from .types.files import FileTree + + +def filter_file_tree(filetree: FileTree) -> FileTree: + """Stub.""" + return filetree + + +@lru_cache +def compile_pat(pattern: str) -> Union[re.Pattern, None]: + """Compile .gitignore-style ignore lines to regular expressions.""" + orig = pattern + # A line starting with # serves as a comment. + if pattern.startswith('#'): + return None + + # An optional prefix "!" which negates the pattern; + invert = pattern.startswith('!') + + # Put a backslash ("\") in front of the first hash for patterns that begin with a hash. + # Put a backslash ("\") in front of the first "!" for patterns that begin with a literal "!" + if pattern.startswith((r'\#', r'\!')): + pattern = pattern[1:] # Unescape + + # Trailing spaces are ignored unless they are quoted with backslash ("\"). + pattern = re.sub(r'(? Date: Wed, 19 Jun 2024 13:53:53 +0900 Subject: [PATCH 10/20] feat(test): Add fixture to return bids-examples directory --- tests/conftest.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 tests/conftest.py diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..0cecb70 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,18 @@ +"""Pytest configuration.""" + +import importlib.resources +import os +from pathlib import Path + +import pytest + + +@pytest.fixture(scope='session') +def examples() -> Path: + """Get bids-examples from submodule, allow environment variable override.""" + ret = os.getenv('BIDS_EXAMPLES') + if not ret: + ret = importlib.resources.files(__package__) / 'data' / 'bids-examples' + if not ret.exists(): + pytest.skip('Missing examples') + return Path(ret) From 2b5a6e29f460c4b65c88c389a70a099b74d263a1 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Wed, 19 Jun 2024 14:13:24 +0900 Subject: [PATCH 11/20] fix: Clean up FileTree API --- src/bids_validator/types/files.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/bids_validator/types/files.py b/src/bids_validator/types/files.py index 29f0936..a1184eb 100644 --- a/src/bids_validator/types/files.py +++ b/src/bids_validator/types/files.py @@ -3,16 +3,19 @@ import os import posixpath import stat +from pathlib import Path from typing import Dict, Self, Union import attrs +__all__ = ('FileTree',) + @attrs.define -class DummyDirentry: +class UserDirEntry: """Partial reimplementation of :class:`os.DirEntry`. - :class:`os.DirEntry` can't be instantiated, but this can. + :class:`os.DirEntry` can't be instantiated from Python, but this can. """ path: str = attrs.field(repr=False, converter=os.fspath) @@ -53,18 +56,18 @@ def is_symlink(self) -> bool: return stat.S_ISLNK(_stat.st_mode) -def as_direntry(obj: os.PathLike) -> Union[os.DirEntry, DummyDirentry]: +def as_direntry(obj: os.PathLike) -> Union[os.DirEntry, UserDirEntry]: """Convert PathLike into DirEntry-like object.""" if isinstance(obj, os.DirEntry): return obj - return DummyDirentry(obj) + return UserDirEntry(obj) @attrs.define class FileTree: """Represent a FileTree with cached metadata.""" - direntry: Union[os.DirEntry, DummyDirentry] = attrs.field(repr=False, converter=as_direntry) + direntry: Union[os.DirEntry, UserDirEntry] = attrs.field(repr=False, converter=as_direntry) parent: Union['FileTree', None] = attrs.field(repr=False, default=None) is_dir: bool = attrs.field(default=False) children: Dict[str, 'FileTree'] = attrs.field(repr=False, factory=dict) @@ -95,12 +98,12 @@ def read_from_filesystem( } return self - def __contains__(self, relpath: os.PathLike): - parts = posixpath.split(relpath) + def __contains__(self, relpath: os.PathLike) -> bool: + parts = Path(relpath).parts if len(parts) == 0: return False child = self.children.get(parts[0], False) - return child and posixpath.join(*parts[1:]) in child + return child and (len(parts) == 1 or posixpath.join(*parts[1:]) in child) def __fspath__(self): return self.direntry.path From bcf89079260dbfe8be4175419f54264e26022b78 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Wed, 19 Jun 2024 14:14:23 +0900 Subject: [PATCH 12/20] feat(filetree): Add relative_path property that matches ignore expectations --- src/bids_validator/types/files.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/bids_validator/types/files.py b/src/bids_validator/types/files.py index a1184eb..a8a0d3a 100644 --- a/src/bids_validator/types/files.py +++ b/src/bids_validator/types/files.py @@ -3,6 +3,7 @@ import os import posixpath import stat +from functools import cached_property from pathlib import Path from typing import Dict, Self, Union @@ -107,3 +108,19 @@ def __contains__(self, relpath: os.PathLike) -> bool: def __fspath__(self): return self.direntry.path + + @cached_property + def relative_path(self) -> str: + """The path of the current FileTree, relative to the root. + + Follows parents up to the root and joins with POSIX separators (/). + + Directories include trailing slashes for simpler matching. + """ + if self.parent is None: + return '/' + + return posixpath.join( + self.parent.relative_path, + f'{self.name}/' if self.is_dir else self.name, + )[1:] From 8c5f46e9de24bfe25ab4dc13cc9850d3d5d14c0a Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Wed, 19 Jun 2024 14:15:30 +0900 Subject: [PATCH 13/20] feat(test): Validate Ignore class functionality --- tests/test_bidsignore.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/tests/test_bidsignore.py b/tests/test_bidsignore.py index 55a955f..0ef68ba 100644 --- a/tests/test_bidsignore.py +++ b/tests/test_bidsignore.py @@ -1,7 +1,8 @@ """Test bids_validator.bidsignore.""" import pytest -from bids_validator.bidsignore import compile_pat +from bids_validator.bidsignore import Ignore, compile_pat +from bids_validator.types.files import FileTree @pytest.mark.parametrize( @@ -46,3 +47,19 @@ def test_skipped_patterns(): assert compile_pat('') is None assert compile_pat('# commented line') is None assert compile_pat(' ') is None + + +def test_Ignore_ds000117(examples): + """Test that we can load a .bidsignore file and match a file.""" + ds000117 = FileTree.read_from_filesystem(examples / 'ds000117') + ignore = Ignore.from_file(ds000117.children['.bidsignore']) + assert 'run-*_echo-*_FLASH.json' in ignore.patterns + assert 'sub-01/ses-mri/anat/sub-01_ses-mri_run-1_echo-1_FLASH.nii.gz' in ds000117 + assert ignore.match('sub-01/ses-mri/anat/sub-01_ses-mri_run-1_echo-1_FLASH.nii.gz') + flash_file = ( + ds000117.children['sub-01'] + .children['ses-mri'] + .children['anat'] + .children['sub-01_ses-mri_run-1_echo-1_FLASH.nii.gz'] + ) + assert ignore.match(flash_file.relative_path) From acc73a848c3f200847ff9d9f1c316b9f840c9a43 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Wed, 19 Jun 2024 16:07:54 +0900 Subject: [PATCH 14/20] feat(ignore): Add tree filtering function, record filtered files --- src/bids_validator/bidsignore.py | 46 +++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 9 deletions(-) diff --git a/src/bids_validator/bidsignore.py b/src/bids_validator/bidsignore.py index 29ca9ff..9a7cae0 100644 --- a/src/bids_validator/bidsignore.py +++ b/src/bids_validator/bidsignore.py @@ -3,18 +3,13 @@ import os import re from functools import lru_cache -from typing import List, Union +from typing import List, Self, Union import attrs from .types.files import FileTree -def filter_file_tree(filetree: FileTree) -> FileTree: - """Stub.""" - return filetree - - @lru_cache def compile_pat(pattern: str) -> Union[re.Pattern, None]: """Compile .gitignore-style ignore lines to regular expressions.""" @@ -69,9 +64,13 @@ def compile_pat(pattern: str) -> Union[re.Pattern, None]: @attrs.define class Ignore: - """Collection of .gitignore-style patterns.""" + """Collection of .gitignore-style patterns. + + Tracks successfully matched files for reporting. + """ patterns: List[str] = attrs.field(factory=list) + history: List[str] = attrs.field(factory=list, init=False) @classmethod def from_file(cls, pathlike: os.PathLike): @@ -79,6 +78,35 @@ def from_file(cls, pathlike: os.PathLike): with open(pathlike) as fobj: return cls([line.rstrip('\n') for line in fobj]) - def match(self, relpath: str): + def match(self, relpath: str) -> bool: """Match a relative path against a collection of ignore patterns.""" - return any(compile_pat(pattern).match(relpath) for pattern in self.patterns) + if any(compile_pat(pattern).match(relpath) for pattern in self.patterns): + self.history.append(relpath) + return True + return False + + def __add__(self, other) -> Self: + return self.__class__(patterns=self.patterns + other.patterns) + + +def filter_file_tree(filetree: FileTree) -> FileTree: + """Stub.""" + bidsignore = filetree.children.get('.bidsignore') + if not bidsignore: + return filetree + ignore = Ignore.from_file(bidsignore) + Ignore(['/.bidsignore']) + return _filter(filetree, ignore) + + +def _filter(filetree: FileTree, ignore: Ignore) -> FileTree: + items = filetree.children.items() + children = { + name: _filter(child, ignore) + for name, child in items + if not ignore.match(child.relative_path) + } + + if any(children.get(name) is not child for name, child in items): + filetree = attrs.evolve(filetree, children=children) + + return filetree From cd0ad803ed4cbdb2268858db72a0cf12e41a8e86 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Wed, 19 Jun 2024 16:37:13 +0900 Subject: [PATCH 15/20] refactor(ignore): Use an explicit chain of ignores so each Ignore can be inspected for history --- src/bids_validator/bidsignore.py | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/src/bids_validator/bidsignore.py b/src/bids_validator/bidsignore.py index 9a7cae0..9078902 100644 --- a/src/bids_validator/bidsignore.py +++ b/src/bids_validator/bidsignore.py @@ -3,7 +3,7 @@ import os import re from functools import lru_cache -from typing import List, Self, Union +from typing import List, Protocol, Union import attrs @@ -62,6 +62,10 @@ def compile_pat(pattern: str) -> Union[re.Pattern, None]: return re.compile(out_pattern) +class HasMatch(Protocol): # noqa: D101 + def match(self, relpath: str) -> bool: ... # noqa: D102 + + @attrs.define class Ignore: """Collection of .gitignore-style patterns. @@ -85,20 +89,32 @@ def match(self, relpath: str) -> bool: return True return False - def __add__(self, other) -> Self: - return self.__class__(patterns=self.patterns + other.patterns) + +@attrs.define +class IgnoreMany: + """Match against several ignore filters.""" + + ignores: List[Ignore] = attrs.field() + + def match(self, relpath: str) -> bool: + """Return true if any filters match the given file. + + Will short-circuit, so ordering is significant for side-effects, + such as recording files ignored by a particular filter. + """ + return any(ignore.match(relpath) for ignore in self.ignores) def filter_file_tree(filetree: FileTree) -> FileTree: - """Stub.""" + """Read .bidsignore and filter file tree.""" bidsignore = filetree.children.get('.bidsignore') if not bidsignore: return filetree - ignore = Ignore.from_file(bidsignore) + Ignore(['/.bidsignore']) + ignore = IgnoreMany([Ignore.from_file(bidsignore), Ignore(['/.bidsignore'])]) return _filter(filetree, ignore) -def _filter(filetree: FileTree, ignore: Ignore) -> FileTree: +def _filter(filetree: FileTree, ignore: HasMatch) -> FileTree: items = filetree.children.items() children = { name: _filter(child, ignore) @@ -106,6 +122,7 @@ def _filter(filetree: FileTree, ignore: Ignore) -> FileTree: if not ignore.match(child.relative_path) } + # XXX This check may not be worth the time. Profile this. if any(children.get(name) is not child for name, child in items): filetree = attrs.evolve(filetree, children=children) From 52738a059eb6679c1fc7c41d7e200aa405ef19bf Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Wed, 19 Jun 2024 20:48:47 +0900 Subject: [PATCH 16/20] fix(filetree): Relative path is always without a root slash --- src/bids_validator/types/files.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bids_validator/types/files.py b/src/bids_validator/types/files.py index a8a0d3a..f97ad27 100644 --- a/src/bids_validator/types/files.py +++ b/src/bids_validator/types/files.py @@ -118,9 +118,9 @@ def relative_path(self) -> str: Directories include trailing slashes for simpler matching. """ if self.parent is None: - return '/' + return '' return posixpath.join( self.parent.relative_path, f'{self.name}/' if self.is_dir else self.name, - )[1:] + ) From a631472affe33dac32c2a326c6fb8705bd5bbf36 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Wed, 19 Jun 2024 20:51:08 +0900 Subject: [PATCH 17/20] test(filetree): Initial tests --- tests/types/__init__.py | 1 + tests/types/test_files.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 tests/types/__init__.py create mode 100644 tests/types/test_files.py diff --git a/tests/types/__init__.py b/tests/types/__init__.py new file mode 100644 index 0000000..93961ce --- /dev/null +++ b/tests/types/__init__.py @@ -0,0 +1 @@ +"""Tests for bids_validator.types.""" diff --git a/tests/types/test_files.py b/tests/types/test_files.py new file mode 100644 index 0000000..df1d485 --- /dev/null +++ b/tests/types/test_files.py @@ -0,0 +1,18 @@ +"""Tests for bids_validator.types.files.""" + +import attrs +from bids_validator.types.files import FileTree + + +def test_FileTree(examples): + """Test the FileTree class.""" + ds000117 = FileTree.read_from_filesystem(examples / 'ds000117') + assert 'sub-01/ses-mri/anat/sub-01_ses-mri_acq-mprage_T1w.nii.gz' in ds000117 + assert ds000117.children['sub-01'].parent is ds000117 + + # Verify that evolving FileTrees creates consistent structures + evolved = attrs.evolve(ds000117) + assert evolved.children['sub-01'].parent is not ds000117 + assert evolved.children['sub-01'].parent is evolved + assert evolved.children['sub-01'].children['ses-mri'].parent is not ds000117.children['sub-01'] + assert evolved.children['sub-01'].children['ses-mri'].parent is evolved.children['sub-01'] From 44ef2be452e9382ba68a7a47b975f294bdbcb99e Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Thu, 11 Jul 2024 15:24:03 -0400 Subject: [PATCH 18/20] Update to schema 0.10.0+ --- src/bids_validator/context.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bids_validator/context.py b/src/bids_validator/context.py index 29d0294..0db70d3 100644 --- a/src/bids_validator/context.py +++ b/src/bids_validator/context.py @@ -3,7 +3,7 @@ from .context_generator import get_schema, load_schema_into_namespace schema = get_schema() -load_schema_into_namespace(schema['meta']['context']['context'], globals(), 'Context') +load_schema_into_namespace(schema['meta']['context'], globals(), 'Context') __all__ = [ # noqa: F822 From 684e7536cecb78dd1112224b6dd1074af1b82d4d Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Thu, 15 Aug 2024 10:59:33 -0400 Subject: [PATCH 19/20] fix: Import Self from typing_extensions --- pyproject.toml | 1 + src/bids_validator/types/files.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 12e3635..e4f0e04 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,6 +23,7 @@ classifiers = [ requires-python = ">=3.8" dependencies = [ "bidsschematools >=0.11", + "typing_extensions", "attrs", "httpx", ] diff --git a/src/bids_validator/types/files.py b/src/bids_validator/types/files.py index f97ad27..ff13e04 100644 --- a/src/bids_validator/types/files.py +++ b/src/bids_validator/types/files.py @@ -5,9 +5,10 @@ import stat from functools import cached_property from pathlib import Path -from typing import Dict, Self, Union +from typing import Dict, Union import attrs +from typing_extensions import Self # PY310 __all__ = ('FileTree',) From 21cf04ce80054e39d212f88f1921963e6544a27a Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Sat, 31 Aug 2024 22:02:31 -0400 Subject: [PATCH 20/20] fix: Get type names in a py<310 compatible manner --- src/bids_validator/context_generator.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/bids_validator/context_generator.py b/src/bids_validator/context_generator.py index a802186..cbf6d61 100644 --- a/src/bids_validator/context_generator.py +++ b/src/bids_validator/context_generator.py @@ -86,6 +86,13 @@ def typespec_to_type(name: str, typespec: Dict[str, Any]): return type_, metadata +def _type_name(tp: type) -> str: + try: + return tp.__name__ + except AttributeError: + return str(tp) + + def create_attrs_class( class_name: str, properties: Dict[str, Any], @@ -128,7 +135,12 @@ def create_attrs_class( Attributes ---------- """ - + '\n'.join([f'{k}: {v.type.__name__}' for k, v in attributes.items()]), + + '\n'.join( + [ + f'{k}: {_type_name(v.type)}\n\t{v.metadata["description"]}' + for k, v in attributes.items() + ] + ), }, )