Merge pull request #127 from fgcz/main

app_runner 0.0.12, bfabricPY 1.13.16
fgcz · Jan 22, 2025 · 70f3212 · 70f3212
2 parents 3c34deb + 9304b24
commit 70f3212
Show file tree

Hide file tree

Showing 20 changed files with 233 additions and 26 deletions.
diff --git a/app_runner/docs/changelog.md b/app_runner/docs/changelog.md
@@ -4,6 +4,15 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 
 ## \[Unreleased\]
 
+## \[0.0.12\] - 2025-01-22
+
+## Added
+
+- New input type `bfabric_order_fasta` which will place an order fasta file to the specified path, or create an empty
+    file if there is no order fasta available.
+- `--filter` flag has been added to `inputs prepare` and `inputs clean` commands.
+- The `app-runner app` commands now support passing a `AppVersion` yaml file instead of just a `AppSpec` yaml file.
+
 ## \[0.0.11\] - 2025-01-16
 
 ### Added

diff --git a/app_runner/pyproject.toml b/app_runner/pyproject.toml
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "app_runner"
 description = "Application runner for B-Fabric apps"
-version = "0.0.11"
+version = "0.0.12"
 license = { text = "GPL-3.0" }
 authors = [
     {name = "Leonardo Schwarz", email = "[email protected]"},

diff --git a/app_runner/src/app_runner/app_runner/resolve_app.py b/app_runner/src/app_runner/app_runner/resolve_app.py
@@ -2,14 +2,16 @@
 
 from typing import TYPE_CHECKING
 
-from bfabric.experimental.workunit_definition import WorkunitDefinition
+import yaml
+from pydantic import ValidationError
 
 from app_runner.specs.app.app_spec import AppSpec
+from app_runner.specs.app.app_version import AppVersion
+from bfabric.experimental.workunit_definition import WorkunitDefinition
 
 if TYPE_CHECKING:
     from pathlib import Path
     from bfabric import Bfabric
-    from app_runner.specs.app.app_version import AppVersion
 
 
 def resolve_app(versions: AppSpec, workunit_definition: WorkunitDefinition) -> AppVersion:
@@ -23,6 +25,16 @@ def resolve_app(versions: AppSpec, workunit_definition: WorkunitDefinition) -> A
     return versions[app_version]
 
 
+def _load_spec(spec_path: Path, app_id: int, app_name: str) -> AppVersion | AppSpec:
+    # TODO the reason this exists is I don't want to refactor and complicate the CLI right now, however,
+    #      it is not fully clear if this is perfectly sound in all cases.
+    data = yaml.safe_load(spec_path.read_text())
+    try:
+        return AppVersion.model_validate(data)
+    except ValidationError:
+        return AppSpec.load_yaml(spec_path, app_id=app_id, app_name=app_name)
+
+
 def load_workunit_information(
     app_spec: Path, client: Bfabric, work_dir: Path, workunit_ref: int | Path
 ) -> tuple[AppVersion, Path]:
@@ -39,12 +51,14 @@ def load_workunit_information(
     """
     workunit_definition_file = work_dir / "workunit_definition.yml"
     workunit_definition = WorkunitDefinition.from_ref(workunit_ref, client, cache_file=workunit_definition_file)
-    app_versions = AppSpec.load_yaml(
-        app_spec,
-        app_id=workunit_definition.registration.application_id,
-        app_name=workunit_definition.registration.application_name,
+    app_parsed = _load_spec(
+        app_spec, workunit_definition.registration.application_id, workunit_definition.registration.application_name
     )
+
     if isinstance(workunit_ref, int):
         workunit_ref = workunit_definition_file
-    app_version = resolve_app(versions=app_versions, workunit_definition=workunit_definition)
+    if isinstance(app_parsed, AppVersion):
+        app_version = app_parsed
+    else:
+        app_version = resolve_app(versions=app_parsed, workunit_definition=workunit_definition)
     return app_version, workunit_ref
diff --git a/app_runner/src/app_runner/app_runner/runner.py b/app_runner/src/app_runner/app_runner/runner.py
@@ -31,7 +31,11 @@ def run_dispatch(self, workunit_ref: int | Path, work_dir: Path) -> None:
 
     def run_prepare_input(self, chunk_dir: Path) -> None:
         prepare_folder(
-            inputs_yaml=chunk_dir / "inputs.yml", target_folder=chunk_dir, client=self._client, ssh_user=self._ssh_user
+            inputs_yaml=chunk_dir / "inputs.yml",
+            target_folder=chunk_dir,
+            client=self._client,
+            ssh_user=self._ssh_user,
+            filter=None,
         )
 
     def run_collect(self, workunit_ref: int | Path, chunk_dir: Path) -> None:

diff --git a/app_runner/src/app_runner/cli/app.py b/app_runner/src/app_runner/cli/app.py
@@ -26,7 +26,6 @@ def run(
     # TODO doc
     setup_script_logging()
     client = Bfabric.from_config()
-
     app_version, workunit_ref = load_workunit_information(app_spec, client, work_dir, workunit_ref)
 
     # TODO(#107): usage of entity lookup cache was problematic -> beyond the full solution we could also consider

diff --git a/app_runner/src/app_runner/cli/inputs.py b/app_runner/src/app_runner/cli/inputs.py
@@ -24,12 +24,14 @@ def prepare(
     target_folder: Path | None = None,
     *,
     ssh_user: str | None = None,
+    filter: str | None = None,
 ) -> None:
-    """Prepare the input files by downloading them (if necessary).
+    """Prepare the input files by downloading and generating them (if necessary).
 
     :param inputs_yaml: Path to the inputs.yml file.
     :param target_folder: Path to the target folder where the input files should be downloaded.
     :param ssh_user: SSH user to use for downloading the input files, instead of the current user.
+    :param filter: only this input file will be prepared.
     """
     setup_script_logging()
     client = Bfabric.from_config()
@@ -39,18 +41,22 @@ def prepare(
         ssh_user=ssh_user,
         client=client,
         action="prepare",
+        filter=filter,
     )
 
 
 @app_inputs.command()
 def clean(
     inputs_yaml: Path,
     target_folder: Path | None = None,
+    *,
+    filter: str | None = None,
 ) -> None:
     """Removes all local copies of input files.
 
     :param inputs_yaml: Path to the inputs.yml file.
     :param target_folder: Path to the target folder where the input files should be removed.
+    :param filter: only this input file will be removed.
     """
     setup_script_logging()
     client = Bfabric.from_config()
@@ -59,8 +65,9 @@ def clean(
         inputs_yaml=inputs_yaml,
         target_folder=target_folder,
         ssh_user=None,
-        action="clean",
         client=client,
+        action="clean",
+        filter=filter,
     )
 
 
@@ -72,7 +79,7 @@ def get_inputs_and_print(
     """Reads the input files, performing integrity checks if requested, and prints the results."""
     client = Bfabric.from_config()
     input_states = list_input_states(
-        specs=InputsSpec.read_yaml(inputs_yaml),
+        specs=InputsSpec.read_yaml_old(inputs_yaml),
         target_folder=target_folder or Path(),
         client=client,
         check_files=check,

diff --git a/app_runner/src/app_runner/input_preparation/collect_annotation.py b/app_runner/src/app_runner/input_preparation/collect_annotation.py
@@ -1,8 +1,9 @@
 from pathlib import Path
+
+import polars as pl
 from bfabric import Bfabric
 from bfabric.entities import Resource
 from bfabric.utils.polars_utils import flatten_relations
-import polars as pl
 
 from app_runner.specs.inputs.bfabric_annotation_spec import BfabricAnnotationResourceSampleSpec, BfabricAnnotationSpec
 

diff --git a/app_runner/src/app_runner/input_preparation/integrity.py b/app_runner/src/app_runner/input_preparation/integrity.py
@@ -2,6 +2,7 @@
 
 from enum import Enum
 
+from app_runner.specs.inputs.bfabric_order_fasta_spec import BfabricOrderFastaSpec
 from app_runner.specs.inputs.file_scp_spec import FileScpSpec
 from bfabric.entities import Resource, Dataset
 from app_runner.specs.inputs.bfabric_dataset_spec import BfabricDatasetSpec  # noqa: TC001
@@ -38,7 +39,7 @@ def check_integrity(spec: InputSpecType, local_path: Path, client: Bfabric) -> I
         return _check_resource_spec(spec, local_path, client)
     elif isinstance(spec, BfabricDatasetSpec):
         return _check_dataset_spec(spec, local_path, client)
-    elif isinstance(spec, FileScpSpec) or spec.type == "bfabric_annotation":
+    elif isinstance(spec, FileScpSpec) or spec.type == "bfabric_annotation" or isinstance(spec, BfabricOrderFastaSpec):
         return IntegrityState.NotChecked
     else:
         raise ValueError(f"Unsupported spec type: {type(spec)}")

diff --git a/app_runner/src/app_runner/input_preparation/prepare.py b/app_runner/src/app_runner/input_preparation/prepare.py
@@ -1,22 +1,23 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Literal
+from typing import TYPE_CHECKING, Literal, assert_never
 
 from loguru import logger
 
 from app_runner.input_preparation.collect_annotation import prepare_annotation
 from app_runner.input_preparation.integrity import IntegrityState
 from app_runner.input_preparation.list_inputs import list_input_states
+from app_runner.specs.inputs.bfabric_dataset_spec import BfabricDatasetSpec
+from app_runner.specs.inputs.bfabric_order_fasta_spec import BfabricOrderFastaSpec
+from app_runner.specs.inputs.bfabric_resource_spec import BfabricResourceSpec
+from app_runner.specs.inputs.file_scp_spec import FileScpSpec
 from app_runner.specs.inputs_spec import (
     InputSpecType,
     InputsSpec,
 )
-from app_runner.specs.inputs.bfabric_dataset_spec import BfabricDatasetSpec
-from app_runner.specs.inputs.file_scp_spec import FileScpSpec
-from app_runner.specs.inputs.bfabric_resource_spec import BfabricResourceSpec
 from app_runner.util.checksums import md5sum
 from app_runner.util.scp import scp
-from bfabric.entities import Resource, Dataset
+from bfabric.entities import Resource, Dataset, Workunit, Order
 
 if TYPE_CHECKING:
     from pathlib import Path
@@ -45,6 +46,8 @@ def prepare_all(self, specs: list[InputSpecType]) -> None:
                 self.prepare_dataset(spec)
             elif spec.type == "bfabric_annotation":
                 prepare_annotation(spec, client=self._client, working_dir=self._working_dir)
+            elif isinstance(spec, BfabricOrderFastaSpec):
+                self.prepare_order_fasta(spec)
             else:
                 raise ValueError(f"Unsupported spec type: {type(spec)}")
 
@@ -93,16 +96,39 @@ def prepare_file_scp(self, spec: FileScpSpec) -> None:
     def prepare_dataset(self, spec: BfabricDatasetSpec) -> None:
         dataset = Dataset.find(id=spec.id, client=self._client)
         # TODO use the new functionality Dataset.get_csv (or even go further in the refactoring)
+
         target_path = self._working_dir / spec.filename
         target_path.parent.mkdir(exist_ok=True, parents=True)
         dataset.write_csv(path=target_path, separator=spec.separator)
 
+    def prepare_order_fasta(self, spec: BfabricOrderFastaSpec) -> None:
+        # Find the order.
+        match spec.entity:
+            case "workunit":
+                workunit = Workunit.find(id=spec.id, client=self._client)
+                if not isinstance(workunit.container, Order):
+                    raise ValueError(f"Workunit {workunit.id} is not associated with an order")
+                order = workunit.container
+            case "order":
+                order = Order.find(id=spec.id, client=self._client)
+            case _:
+                assert_never(spec.entity)
+
+        # Write the result into the file
+        result_name = self._working_dir / spec.filename
+        result_name.parent.mkdir(exist_ok=True, parents=True)
+        fasta_content = order.data_dict.get("fastasequence", "")
+        if fasta_content and fasta_content[-1] != "\n":
+            fasta_content += "\n"
+        result_name.write_text(fasta_content)
+
 
 def prepare_folder(
     inputs_yaml: Path,
     target_folder: Path | None,
     client: Bfabric,
     ssh_user: str | None,
+    filter: str | None,
     action: Literal["prepare", "clean"] = "prepare",
 ) -> None:
     """Prepares the input files of a chunk folder according to the provided specs.
@@ -111,6 +137,7 @@ def prepare_folder(
     :param target_folder: Path to the target folder where the input files should be downloaded.
     :param client: Bfabric client to use for obtaining metadata about the input files.
     :param ssh_user: SSH user to use for downloading the input files, should it be different from the current user.
+    :param filter: only this input file will be prepared.
     :param action: Action to perform.
     """
     # set defaults
@@ -119,13 +146,18 @@ def prepare_folder(
         target_folder = inputs_yaml.parent
 
     # parse the specs
-    specs_list = InputsSpec.read_yaml(inputs_yaml)
+    inputs_spec = InputsSpec.read_yaml(inputs_yaml)
+
+    if filter:
+        inputs_spec = inputs_spec.apply_filter(filter, client=client)
+        if not inputs_spec.inputs:
+            raise ValueError(f"Filter {filter} did not match any input files")
 
     # prepare the folder
     prepare = PrepareInputs(client=client, working_dir=target_folder, ssh_user=ssh_user)
     if action == "prepare":
-        prepare.prepare_all(specs=specs_list)
+        prepare.prepare_all(specs=inputs_spec.inputs)
     elif action == "clean":
-        prepare.clean_all(specs=specs_list)
+        prepare.clean_all(specs=inputs_spec.inputs)
     else:
         raise ValueError(f"Unknown action: {action}")
diff --git a/app_runner/src/app_runner/specs/inputs/bfabric_order_fasta_spec.py b/app_runner/src/app_runner/specs/inputs/bfabric_order_fasta_spec.py
@@ -0,0 +1,21 @@
+from __future__ import annotations
+from typing import Literal, TYPE_CHECKING
+
+from pydantic import BaseModel, ConfigDict
+
+from app_runner.specs.common_types import RelativeFilePath  # noqa: TC001
+
+if TYPE_CHECKING:
+    from bfabric import Bfabric
+
+
+class BfabricOrderFastaSpec(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    type: Literal["bfabric_order_fasta"] = "bfabric_order_fasta"
+
+    id: int
+    entity: Literal["workunit", "order"]
+    filename: RelativeFilePath
+
+    def resolve_filename(self, client: Bfabric) -> str:
+        return self.filename
diff --git a/app_runner/src/app_runner/specs/inputs_spec.py b/app_runner/src/app_runner/specs/inputs_spec.py
@@ -7,14 +7,17 @@
 
 from app_runner.specs.inputs.bfabric_annotation_spec import BfabricAnnotationSpec
 from app_runner.specs.inputs.bfabric_dataset_spec import BfabricDatasetSpec
+from app_runner.specs.inputs.bfabric_order_fasta_spec import BfabricOrderFastaSpec
 from app_runner.specs.inputs.bfabric_resource_spec import BfabricResourceSpec
 from app_runner.specs.inputs.file_scp_spec import FileScpSpec
 
 if TYPE_CHECKING:
     from pathlib import Path
+    from bfabric import Bfabric
 
 InputSpecType = Annotated[
-    BfabricResourceSpec | FileScpSpec | BfabricDatasetSpec | BfabricAnnotationSpec, Field(discriminator="type")
+    BfabricResourceSpec | FileScpSpec | BfabricDatasetSpec | BfabricOrderFastaSpec | BfabricAnnotationSpec,
+    Field(discriminator="type"),
 ]
 
 
@@ -23,11 +26,22 @@ class InputsSpec(BaseModel):
     inputs: list[InputSpecType]
 
     @classmethod
-    def read_yaml(cls, path: Path) -> list[InputSpecType]:
+    def read_yaml(cls, path: Path) -> InputsSpec:
+        return cls.model_validate(yaml.safe_load(path.read_text()))
+
+    @classmethod
+    def read_yaml_old(cls, path: Path) -> list[InputSpecType]:
         model = cls.model_validate(yaml.safe_load(path.read_text()))
         return model.inputs
 
     @classmethod
     def write_yaml(cls, specs: list[InputSpecType], path: Path) -> None:
         model = cls.model_validate(dict(specs=specs))
         path.write_text(yaml.dump(model.model_dump(mode="json")))
+
+    def apply_filter(self, filter: str, client: Bfabric) -> InputsSpec:
+        matches = []
+        for spec in self.inputs:
+            if spec.resolve_filename(client) == filter:
+                matches.append(spec)
+        return type(self)(inputs=matches)
diff --git a/docs/changelog.md b/docs/changelog.md
@@ -10,9 +10,14 @@ Versioning currently follows `X.Y.Z` where
 
 ## \[Unreleased\]
 
+## \[1.13.16\] - 2025-01-22
+
 ### Added
 
+- Add missing `Entity.__contains__` implementation to check if a key is present in an entity.
 - `polars_utils.py` which contains functionality to normalize relational fields in tables
+- Add `bfabric-cli executable inspect` command to inspect executables registered in B-Fabric.
+- Add `bfabric-cli executable upload` command to upload executables to B-Fabric.
 
 ### Fixed
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "bfabric"
 description = "Python client for the B-Fabric WSDL API"
-version = "1.13.15"
+version = "1.13.16"
 license = { text = "GPL-3.0" }
 authors = [
     { name = "Christian Panse", email = "[email protected]" },

diff --git a/src/bfabric/entities/core/entity.py b/src/bfabric/entities/core/entity.py
@@ -80,6 +80,10 @@ def find_by(cls, obj: dict[str, Any], client: Bfabric, max_results: int | None =
         result = client.read(cls.ENDPOINT, obj=obj, max_results=max_results)
         return {x["id"]: cls(x, client=client) for x in result}
 
+    def __contains__(self, key: str) -> Any:
+        """Checks if a key is present in the data dictionary."""
+        return key in self.__data_dict
+
     def __getitem__(self, key: str) -> Any:
         """Returns the value of a key in the data dictionary."""
         return self.__data_dict[key]