Skip to content

Commit

Permalink
Merge pull request #127 from fgcz/main
Browse files Browse the repository at this point in the history
app_runner 0.0.12, bfabricPY 1.13.16
  • Loading branch information
leoschwarz authored Jan 22, 2025
2 parents 3c34deb + 9304b24 commit 70f3212
Show file tree
Hide file tree
Showing 20 changed files with 233 additions and 26 deletions.
9 changes: 9 additions & 0 deletions app_runner/docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,15 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).

## \[Unreleased\]

## \[0.0.12\] - 2025-01-22

## Added

- New input type `bfabric_order_fasta` which will place an order fasta file to the specified path, or create an empty
file if there is no order fasta available.
- `--filter` flag has been added to `inputs prepare` and `inputs clean` commands.
- The `app-runner app` commands now support passing a `AppVersion` yaml file instead of just a `AppSpec` yaml file.

## \[0.0.11\] - 2025-01-16

### Added
Expand Down
2 changes: 1 addition & 1 deletion app_runner/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "app_runner"
description = "Application runner for B-Fabric apps"
version = "0.0.11"
version = "0.0.12"
license = { text = "GPL-3.0" }
authors = [
{name = "Leonardo Schwarz", email = "[email protected]"},
Expand Down
28 changes: 21 additions & 7 deletions app_runner/src/app_runner/app_runner/resolve_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@

from typing import TYPE_CHECKING

from bfabric.experimental.workunit_definition import WorkunitDefinition
import yaml
from pydantic import ValidationError

from app_runner.specs.app.app_spec import AppSpec
from app_runner.specs.app.app_version import AppVersion
from bfabric.experimental.workunit_definition import WorkunitDefinition

if TYPE_CHECKING:
from pathlib import Path
from bfabric import Bfabric
from app_runner.specs.app.app_version import AppVersion


def resolve_app(versions: AppSpec, workunit_definition: WorkunitDefinition) -> AppVersion:
Expand All @@ -23,6 +25,16 @@ def resolve_app(versions: AppSpec, workunit_definition: WorkunitDefinition) -> A
return versions[app_version]


def _load_spec(spec_path: Path, app_id: int, app_name: str) -> AppVersion | AppSpec:
# TODO the reason this exists is I don't want to refactor and complicate the CLI right now, however,
# it is not fully clear if this is perfectly sound in all cases.
data = yaml.safe_load(spec_path.read_text())
try:
return AppVersion.model_validate(data)
except ValidationError:
return AppSpec.load_yaml(spec_path, app_id=app_id, app_name=app_name)


def load_workunit_information(
app_spec: Path, client: Bfabric, work_dir: Path, workunit_ref: int | Path
) -> tuple[AppVersion, Path]:
Expand All @@ -39,12 +51,14 @@ def load_workunit_information(
"""
workunit_definition_file = work_dir / "workunit_definition.yml"
workunit_definition = WorkunitDefinition.from_ref(workunit_ref, client, cache_file=workunit_definition_file)
app_versions = AppSpec.load_yaml(
app_spec,
app_id=workunit_definition.registration.application_id,
app_name=workunit_definition.registration.application_name,
app_parsed = _load_spec(
app_spec, workunit_definition.registration.application_id, workunit_definition.registration.application_name
)

if isinstance(workunit_ref, int):
workunit_ref = workunit_definition_file
app_version = resolve_app(versions=app_versions, workunit_definition=workunit_definition)
if isinstance(app_parsed, AppVersion):
app_version = app_parsed
else:
app_version = resolve_app(versions=app_parsed, workunit_definition=workunit_definition)
return app_version, workunit_ref
6 changes: 5 additions & 1 deletion app_runner/src/app_runner/app_runner/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,11 @@ def run_dispatch(self, workunit_ref: int | Path, work_dir: Path) -> None:

def run_prepare_input(self, chunk_dir: Path) -> None:
prepare_folder(
inputs_yaml=chunk_dir / "inputs.yml", target_folder=chunk_dir, client=self._client, ssh_user=self._ssh_user
inputs_yaml=chunk_dir / "inputs.yml",
target_folder=chunk_dir,
client=self._client,
ssh_user=self._ssh_user,
filter=None,
)

def run_collect(self, workunit_ref: int | Path, chunk_dir: Path) -> None:
Expand Down
1 change: 0 additions & 1 deletion app_runner/src/app_runner/cli/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ def run(
# TODO doc
setup_script_logging()
client = Bfabric.from_config()

app_version, workunit_ref = load_workunit_information(app_spec, client, work_dir, workunit_ref)

# TODO(#107): usage of entity lookup cache was problematic -> beyond the full solution we could also consider
Expand Down
13 changes: 10 additions & 3 deletions app_runner/src/app_runner/cli/inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,14 @@ def prepare(
target_folder: Path | None = None,
*,
ssh_user: str | None = None,
filter: str | None = None,
) -> None:
"""Prepare the input files by downloading them (if necessary).
"""Prepare the input files by downloading and generating them (if necessary).
:param inputs_yaml: Path to the inputs.yml file.
:param target_folder: Path to the target folder where the input files should be downloaded.
:param ssh_user: SSH user to use for downloading the input files, instead of the current user.
:param filter: only this input file will be prepared.
"""
setup_script_logging()
client = Bfabric.from_config()
Expand All @@ -39,18 +41,22 @@ def prepare(
ssh_user=ssh_user,
client=client,
action="prepare",
filter=filter,
)


@app_inputs.command()
def clean(
inputs_yaml: Path,
target_folder: Path | None = None,
*,
filter: str | None = None,
) -> None:
"""Removes all local copies of input files.
:param inputs_yaml: Path to the inputs.yml file.
:param target_folder: Path to the target folder where the input files should be removed.
:param filter: only this input file will be removed.
"""
setup_script_logging()
client = Bfabric.from_config()
Expand All @@ -59,8 +65,9 @@ def clean(
inputs_yaml=inputs_yaml,
target_folder=target_folder,
ssh_user=None,
action="clean",
client=client,
action="clean",
filter=filter,
)


Expand All @@ -72,7 +79,7 @@ def get_inputs_and_print(
"""Reads the input files, performing integrity checks if requested, and prints the results."""
client = Bfabric.from_config()
input_states = list_input_states(
specs=InputsSpec.read_yaml(inputs_yaml),
specs=InputsSpec.read_yaml_old(inputs_yaml),
target_folder=target_folder or Path(),
client=client,
check_files=check,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from pathlib import Path

import polars as pl
from bfabric import Bfabric
from bfabric.entities import Resource
from bfabric.utils.polars_utils import flatten_relations
import polars as pl

from app_runner.specs.inputs.bfabric_annotation_spec import BfabricAnnotationResourceSampleSpec, BfabricAnnotationSpec

Expand Down
3 changes: 2 additions & 1 deletion app_runner/src/app_runner/input_preparation/integrity.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from enum import Enum

from app_runner.specs.inputs.bfabric_order_fasta_spec import BfabricOrderFastaSpec
from app_runner.specs.inputs.file_scp_spec import FileScpSpec
from bfabric.entities import Resource, Dataset
from app_runner.specs.inputs.bfabric_dataset_spec import BfabricDatasetSpec # noqa: TC001
Expand Down Expand Up @@ -38,7 +39,7 @@ def check_integrity(spec: InputSpecType, local_path: Path, client: Bfabric) -> I
return _check_resource_spec(spec, local_path, client)
elif isinstance(spec, BfabricDatasetSpec):
return _check_dataset_spec(spec, local_path, client)
elif isinstance(spec, FileScpSpec) or spec.type == "bfabric_annotation":
elif isinstance(spec, FileScpSpec) or spec.type == "bfabric_annotation" or isinstance(spec, BfabricOrderFastaSpec):
return IntegrityState.NotChecked
else:
raise ValueError(f"Unsupported spec type: {type(spec)}")
Expand Down
48 changes: 40 additions & 8 deletions app_runner/src/app_runner/input_preparation/prepare.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,23 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Literal
from typing import TYPE_CHECKING, Literal, assert_never

from loguru import logger

from app_runner.input_preparation.collect_annotation import prepare_annotation
from app_runner.input_preparation.integrity import IntegrityState
from app_runner.input_preparation.list_inputs import list_input_states
from app_runner.specs.inputs.bfabric_dataset_spec import BfabricDatasetSpec
from app_runner.specs.inputs.bfabric_order_fasta_spec import BfabricOrderFastaSpec
from app_runner.specs.inputs.bfabric_resource_spec import BfabricResourceSpec
from app_runner.specs.inputs.file_scp_spec import FileScpSpec
from app_runner.specs.inputs_spec import (
InputSpecType,
InputsSpec,
)
from app_runner.specs.inputs.bfabric_dataset_spec import BfabricDatasetSpec
from app_runner.specs.inputs.file_scp_spec import FileScpSpec
from app_runner.specs.inputs.bfabric_resource_spec import BfabricResourceSpec
from app_runner.util.checksums import md5sum
from app_runner.util.scp import scp
from bfabric.entities import Resource, Dataset
from bfabric.entities import Resource, Dataset, Workunit, Order

if TYPE_CHECKING:
from pathlib import Path
Expand Down Expand Up @@ -45,6 +46,8 @@ def prepare_all(self, specs: list[InputSpecType]) -> None:
self.prepare_dataset(spec)
elif spec.type == "bfabric_annotation":
prepare_annotation(spec, client=self._client, working_dir=self._working_dir)
elif isinstance(spec, BfabricOrderFastaSpec):
self.prepare_order_fasta(spec)
else:
raise ValueError(f"Unsupported spec type: {type(spec)}")

Expand Down Expand Up @@ -93,16 +96,39 @@ def prepare_file_scp(self, spec: FileScpSpec) -> None:
def prepare_dataset(self, spec: BfabricDatasetSpec) -> None:
dataset = Dataset.find(id=spec.id, client=self._client)
# TODO use the new functionality Dataset.get_csv (or even go further in the refactoring)

target_path = self._working_dir / spec.filename
target_path.parent.mkdir(exist_ok=True, parents=True)
dataset.write_csv(path=target_path, separator=spec.separator)

def prepare_order_fasta(self, spec: BfabricOrderFastaSpec) -> None:
# Find the order.
match spec.entity:
case "workunit":
workunit = Workunit.find(id=spec.id, client=self._client)
if not isinstance(workunit.container, Order):
raise ValueError(f"Workunit {workunit.id} is not associated with an order")
order = workunit.container
case "order":
order = Order.find(id=spec.id, client=self._client)
case _:
assert_never(spec.entity)

# Write the result into the file
result_name = self._working_dir / spec.filename
result_name.parent.mkdir(exist_ok=True, parents=True)
fasta_content = order.data_dict.get("fastasequence", "")
if fasta_content and fasta_content[-1] != "\n":
fasta_content += "\n"
result_name.write_text(fasta_content)


def prepare_folder(
inputs_yaml: Path,
target_folder: Path | None,
client: Bfabric,
ssh_user: str | None,
filter: str | None,
action: Literal["prepare", "clean"] = "prepare",
) -> None:
"""Prepares the input files of a chunk folder according to the provided specs.
Expand All @@ -111,6 +137,7 @@ def prepare_folder(
:param target_folder: Path to the target folder where the input files should be downloaded.
:param client: Bfabric client to use for obtaining metadata about the input files.
:param ssh_user: SSH user to use for downloading the input files, should it be different from the current user.
:param filter: only this input file will be prepared.
:param action: Action to perform.
"""
# set defaults
Expand All @@ -119,13 +146,18 @@ def prepare_folder(
target_folder = inputs_yaml.parent

# parse the specs
specs_list = InputsSpec.read_yaml(inputs_yaml)
inputs_spec = InputsSpec.read_yaml(inputs_yaml)

if filter:
inputs_spec = inputs_spec.apply_filter(filter, client=client)
if not inputs_spec.inputs:
raise ValueError(f"Filter {filter} did not match any input files")

# prepare the folder
prepare = PrepareInputs(client=client, working_dir=target_folder, ssh_user=ssh_user)
if action == "prepare":
prepare.prepare_all(specs=specs_list)
prepare.prepare_all(specs=inputs_spec.inputs)
elif action == "clean":
prepare.clean_all(specs=specs_list)
prepare.clean_all(specs=inputs_spec.inputs)
else:
raise ValueError(f"Unknown action: {action}")
21 changes: 21 additions & 0 deletions app_runner/src/app_runner/specs/inputs/bfabric_order_fasta_spec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from __future__ import annotations
from typing import Literal, TYPE_CHECKING

from pydantic import BaseModel, ConfigDict

from app_runner.specs.common_types import RelativeFilePath # noqa: TC001

if TYPE_CHECKING:
from bfabric import Bfabric


class BfabricOrderFastaSpec(BaseModel):
model_config = ConfigDict(extra="forbid")
type: Literal["bfabric_order_fasta"] = "bfabric_order_fasta"

id: int
entity: Literal["workunit", "order"]
filename: RelativeFilePath

def resolve_filename(self, client: Bfabric) -> str:
return self.filename
18 changes: 16 additions & 2 deletions app_runner/src/app_runner/specs/inputs_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,17 @@

from app_runner.specs.inputs.bfabric_annotation_spec import BfabricAnnotationSpec
from app_runner.specs.inputs.bfabric_dataset_spec import BfabricDatasetSpec
from app_runner.specs.inputs.bfabric_order_fasta_spec import BfabricOrderFastaSpec
from app_runner.specs.inputs.bfabric_resource_spec import BfabricResourceSpec
from app_runner.specs.inputs.file_scp_spec import FileScpSpec

if TYPE_CHECKING:
from pathlib import Path
from bfabric import Bfabric

InputSpecType = Annotated[
BfabricResourceSpec | FileScpSpec | BfabricDatasetSpec | BfabricAnnotationSpec, Field(discriminator="type")
BfabricResourceSpec | FileScpSpec | BfabricDatasetSpec | BfabricOrderFastaSpec | BfabricAnnotationSpec,
Field(discriminator="type"),
]


Expand All @@ -23,11 +26,22 @@ class InputsSpec(BaseModel):
inputs: list[InputSpecType]

@classmethod
def read_yaml(cls, path: Path) -> list[InputSpecType]:
def read_yaml(cls, path: Path) -> InputsSpec:
return cls.model_validate(yaml.safe_load(path.read_text()))

@classmethod
def read_yaml_old(cls, path: Path) -> list[InputSpecType]:
model = cls.model_validate(yaml.safe_load(path.read_text()))
return model.inputs

@classmethod
def write_yaml(cls, specs: list[InputSpecType], path: Path) -> None:
model = cls.model_validate(dict(specs=specs))
path.write_text(yaml.dump(model.model_dump(mode="json")))

def apply_filter(self, filter: str, client: Bfabric) -> InputsSpec:
matches = []
for spec in self.inputs:
if spec.resolve_filename(client) == filter:
matches.append(spec)
return type(self)(inputs=matches)
5 changes: 5 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,14 @@ Versioning currently follows `X.Y.Z` where

## \[Unreleased\]

## \[1.13.16\] - 2025-01-22

### Added

- Add missing `Entity.__contains__` implementation to check if a key is present in an entity.
- `polars_utils.py` which contains functionality to normalize relational fields in tables
- Add `bfabric-cli executable inspect` command to inspect executables registered in B-Fabric.
- Add `bfabric-cli executable upload` command to upload executables to B-Fabric.

### Fixed

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "bfabric"
description = "Python client for the B-Fabric WSDL API"
version = "1.13.15"
version = "1.13.16"
license = { text = "GPL-3.0" }
authors = [
{ name = "Christian Panse", email = "[email protected]" },
Expand Down
4 changes: 4 additions & 0 deletions src/bfabric/entities/core/entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ def find_by(cls, obj: dict[str, Any], client: Bfabric, max_results: int | None =
result = client.read(cls.ENDPOINT, obj=obj, max_results=max_results)
return {x["id"]: cls(x, client=client) for x in result}

def __contains__(self, key: str) -> Any:
"""Checks if a key is present in the data dictionary."""
return key in self.__data_dict

def __getitem__(self, key: str) -> Any:
"""Returns the value of a key in the data dictionary."""
return self.__data_dict[key]
Expand Down
Loading

0 comments on commit 70f3212

Please sign in to comment.