PolusAI · ndonyapour · Jul 17, 2024
diff --git a/utils/score-pdb-structures-plugin/.bumpversion.cfg b/utils/score-pdb-structures-plugin/.bumpversion.cfg
@@ -0,0 +1,29 @@
+[bumpversion]
+current_version = 0.1.0
+commit = False
+tag = False
+parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<dev>\d+))?
+serialize = 
+	{major}.{minor}.{patch}-{release}{dev}
+	{major}.{minor}.{patch}
+
+[bumpversion:part:release]
+optional_value = _
+first_value = dev
+values = 
+	dev
+	_
+
+[bumpversion:part:dev]
+
+[bumpversion:file:pyproject.toml]
+search = version = "{current_version}"
+replace = version = "{new_version}"
+
+[bumpversion:file:VERSION]
+
+[bumpversion:file:README.md]
+
+[bumpversion:file:plugin.json]
+
+[bumpversion:file:src/polus/mm/utils/score_pdb_structures/__init__.py]
diff --git a/utils/score-pdb-structures-plugin/.dockerignore b/utils/score-pdb-structures-plugin/.dockerignore
@@ -0,0 +1,4 @@
+.venv
+out
+tests
+__pycache__
diff --git a/utils/score-pdb-structures-plugin/.gitignore b/utils/score-pdb-structures-plugin/.gitignore
@@ -0,0 +1 @@
+poetry.lock
diff --git a/utils/score-pdb-structures-plugin/CHANGELOG.md b/utils/score-pdb-structures-plugin/CHANGELOG.md
@@ -0,0 +1,5 @@
+# CHANGELOG
+
+## 0.1.0
+
+Initial release.
diff --git a/utils/score-pdb-structures-plugin/Dockerfile b/utils/score-pdb-structures-plugin/Dockerfile
@@ -0,0 +1,22 @@
+FROM condaforge/mambaforge
+
+ENV EXEC_DIR="/opt/executables"
+ENV POLUS_LOG="INFO"
+RUN mkdir -p ${EXEC_DIR}
+
+
+# Work directory defined in the base container
+# WORKDIR ${EXEC_DIR}
+
+COPY pyproject.toml ${EXEC_DIR}
+COPY VERSION ${EXEC_DIR}
+COPY README.md ${EXEC_DIR}
+COPY CHANGELOG.md ${EXEC_DIR}
+
+# Install needed packages here
+
+COPY src ${EXEC_DIR}/src
+
+RUN pip3 install ${EXEC_DIR} --no-cache-dir
+
+CMD ["--help"]
diff --git a/utils/score-pdb-structures-plugin/README.md b/utils/score-pdb-structures-plugin/README.md
@@ -0,0 +1,18 @@
+# score_pdb_structures (0.1.0)
+
+Fetches the PDB information from RCSB and scores PDB structures.
+
+## Options
+
+This plugin takes     6     input arguments and 2 output argument:
+
+| Name          | Description             | I/O    | Type   | Default |
+|---------------|-------------------------|--------|--------|---------|
+| input_pdbids | List of input PDBIDs to score, Type: string[], File type: input, Accepted formats: list[string] | Input | ['null', {'type': 'array', 'items': 'string'}] | ['null', {'type': 'array', 'items': 'string'}] |
+| min_row | The row min inex, Type: int | Input | int | int |
+| max_row | The row max inex, Type: int | Input | int | int |
+| output_txt_path | Path to the text dataset file, Type: string, File type: output, Accepted formats: txt | Input | string | string |
+| timeout_duration | The maximum time in seconds to wait for a response from the API before timing out, Type: int | Input | int | int |
+| max_retries | The maximum number of times to retry the request in case of failure, Type: int | Input | int | int |
+| output_txt_path | Path to the txt file | Output | File | File |
+| output_pdbids | The selected PDB IDs | Output | {'type': 'array', 'items': 'string'} | {'type': 'array', 'items': 'string'} |
diff --git a/utils/score-pdb-structures-plugin/VERSION b/utils/score-pdb-structures-plugin/VERSION
@@ -0,0 +1 @@
+0.1.0
diff --git a/utils/score-pdb-structures-plugin/build-docker.sh b/utils/score-pdb-structures-plugin/build-docker.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+version=$(<VERSION)
+docker build . -t polusai/score-pdb-structures-tool:${version}
diff --git a/utils/score-pdb-structures-plugin/ict.yml b/utils/score-pdb-structures-plugin/ict.yml
@@ -0,0 +1,89 @@
+specVersion: "0.1.0"
+name: score_pdb_structures
+version: 0.1.0
+container: score-pdb-structures-plugin
+entrypoint:
+title: score_pdb_structures
+description: Fetches the PDB information from RCSB and scores PDB structures.
+author: Brandon Walker, Nazanin Donyapour
+contact: [email protected], [email protected]
+repository:
+documentation:
+citation:
+
+inputs:
+  - name: input_pdbids
+    required: true
+    description: List of input PDBIDs to score, Type string[], File type input, Accepted formats list[string]
+    type: ['null', {'type': 'array', 'items': 'string'}]
+    format:
+      uri: edam:format_2330
+  - name: min_row
+    required: true
+    description: The row min inex, Type int
+    type: int
+    format:
+      uri: edam:format_2330
+  - name: max_row
+    required: true
+    description: The row max inex, Type int
+    type: int
+    format:
+      uri: edam:format_2330
+  - name: output_txt_path
+    required: true
+    description: Path to the text dataset file, Type string, File type output, Accepted formats txt
+    type: string
+    defaultValue: system.log
+    format:
+      uri: edam:format_2330
+  - name: timeout_duration
+    required: true
+    description: The maximum time in seconds to wait for a response from the API before timing out, Type int
+    type: int
+    defaultValue: 10
+    format:
+      uri: edam:format_2330
+  - name: max_retries
+    required: true
+    description: The maximum number of times to retry the request in case of failure, Type int
+    type: int
+    defaultValue: 5
+    format:
+      uri: edam:format_2330
+outputs:
+  - name: output_txt_path
+    required: true
+    description: Path to the txt file
+    type: File
+    format:
+      uri: edam:format_2330
+  - name: output_pdbids
+    required: true
+    description: The selected PDB IDs
+    type: {'type': 'array', 'items': 'string'}
+ui:
+  - key: inputs.input_pdbids
+    title: "input_pdbids: "
+    description: "List of input PDBIDs to score, Type string[], File type input, Accepted formats list[string]"
+    type: ['null', {'type': 'array', 'items': 'string'}]
+  - key: inputs.min_row
+    title: "min_row: "
+    description: "The row min inex, Type int"
+    type: int
+  - key: inputs.max_row
+    title: "max_row: "
+    description: "The row max inex, Type int"
+    type: int
+  - key: inputs.output_txt_path
+    title: "output_txt_path: "
+    description: "Path to the text dataset file, Type string, File type output, Accepted formats txt"
+    type: string
+  - key: inputs.timeout_duration
+    title: "timeout_duration: "
+    description: "The maximum time in seconds to wait for a response from the API before timing out, Type int"
+    type: int
+  - key: inputs.max_retries
+    title: "max_retries: "
+    description: "The maximum number of times to retry the request in case of failure, Type int"
+    type: int
diff --git a/utils/score-pdb-structures-plugin/pyproject.toml b/utils/score-pdb-structures-plugin/pyproject.toml
@@ -0,0 +1,31 @@
+[tool.poetry]
+name = "polus-mm-utils-score-pdb-structures"
+version = "0.1.0"
+description = "Fetches the PDB information from RCSB and scores PDB structures."
+authors = ["Nazanin Donyapour <[email protected]>", "Brandon Walker <[email protected]>"]
+readme = "README.md"
+packages = [{include = "polus", from = "src"}]
+
+[tool.poetry.dependencies]
+python = ">=3.9,<3.13"
+typer = "^0.7.0"
+sophios = "0.1.4"
+pandas = "2.2.2"
+
+[tool.poetry.group.dev.dependencies]
+bump2version = "^1.0.1"
+pytest = "^7.4"
+pytest-sugar = "^0.9.6"
+pre-commit = "^3.2.1"
+black = "^23.3.0"
+mypy = "^1.1.1"
+ruff = "^0.0.270"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
+
+[tool.pytest.ini_options]
+pythonpath = [
+  "."
+]
diff --git a/utils/score-pdb-structures-plugin/score_pdb_structures_0@[email protected] b/utils/score-pdb-structures-plugin/score_pdb_structures_0@[email protected]
@@ -0,0 +1,135 @@
+#!/usr/bin/env cwl-runner
+cwlVersion: v1.0
+
+class: CommandLineTool
+
+label: Fetches the PDB information from RCSB and scores PDB structures.
+
+doc: |-
+  Fetches the PDB information from RCSB and scores PDB structures.
+
+baseCommand: ["python", "-m", "polus.mm.utils.score_pdb_structures"]
+
+hints:
+  DockerRequirement:
+    dockerPull: polusai/score-pdb-structures-tool@sha256:eb16431eefabef6daacdd9554e6cdb77e2c9396d6be5ac310ef3f7918c15a9a2
+
+requirements:
+  InlineJavascriptRequirement: {}
+
+
+inputs:
+  input_pdbids:
+    label: List of input PDBIDs to score
+    doc: |-
+      List of input PDBIDs to score
+      Type: string[]
+      File type: input
+      Accepted formats: list[string]
+    type: ["null", {"type": "array", "items": "string"}]
+    format: edam:format_2330
+    inputBinding:
+      prefix: --input_pdbids
+    default: []
+
+  min_row:
+    label: The row min index
+    doc: |-
+      The row min inex
+      Type: int
+    type: int?
+    format:
+    - edam:format_2330
+    inputBinding:
+      prefix: --min_row
+    default: 1
+
+  max_row:
+    label: The row max index
+    doc: |-
+      The row max inex
+      Type: int
+    type: int?
+    format:
+    - edam:format_2330
+    inputBinding:
+      prefix: --max_row
+    default: -1
+
+  output_txt_path:
+    label: Path to the text dataset file
+    doc: |-
+      Path to the text dataset file
+      Type: string
+      File type: output
+      Accepted formats: txt
+    type: string
+    format:
+    - edam:format_2330
+    inputBinding:
+      prefix: --output_txt_path
+    default: system.log
+
+  timeout_duration:
+    label: The maximum time in seconds to wait for a response from the API before timing out
+    doc: |-
+      The maximum time in seconds to wait for a response from the API before timing out
+      Type: int
+    type: int?
+    format:
+    - edam:format_2330
+    inputBinding:
+      prefix: --timeout_duration
+    default: 10
+
+  max_retries:
+    label: The maximum number of times to retry the request in case of failure
+    doc: |-
+      The maximum number of times to retry the request in case of failure
+      Type: int
+    type: int?
+    format:
+    - edam:format_2330
+    inputBinding:
+      prefix: --max_retries
+    default: 5
+
+outputs:
+  output_txt_path:
+    label: Path to the txt file
+    doc: |-
+      Path to the txt file
+    type: File
+    outputBinding:
+      glob: $(inputs.output_txt_path)
+    format: edam:format_2330
+
+  output_pdbids:
+    label: The selected PDB IDs
+    doc: |-
+      The selected PDB IDs
+    type:
+      type: array
+      items: string
+    outputBinding:
+      glob: $(inputs.output_txt_path)
+      loadContents: true
+      outputEval: |
+        ${
+          // check if self[0] exists
+          if (!self[0]) {
+            return null;
+          }
+          var lines = self[0].contents.split("\n");
+          // remove black lines
+          lines = lines.filter(function(line) {return line.trim() !== '';});
+          // The outpus file has one line
+          // The format of the line is as follows: 6x7z,4yo7,5fyr,6ktl,4rxm,4irx,3v16,6b5z,4mio,7d5n
+          return lines[0].split(",").map(function(item) {return item.trim();});
+        }
+
+$namespaces:
+  edam: https://edamontology.org/
+
+$schemas:
+- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl
diff --git a/utils/score-pdb-structures-plugin/src/polus/mm/utils/score_pdb_structures/__init__.py b/utils/score-pdb-structures-plugin/src/polus/mm/utils/score_pdb_structures/__init__.py
@@ -0,0 +1,7 @@
+"""score_pdb_structures."""
+
+__version__ = "0.1.0"
+
+from polus.mm.utils.score_pdb_structures.score_pdb_structures import (  # noqa # pylint: disable=unused-import
+    score_pdb_structures,
+)
diff --git a/utils/score-pdb-structures-plugin/src/polus/mm/utils/score_pdb_structures/__main__.py b/utils/score-pdb-structures-plugin/src/polus/mm/utils/score_pdb_structures/__main__.py
@@ -0,0 +1,81 @@
+"""Package entrypoint for the score_pdb_structures package."""
+
+# Base packages
+import argparse
+import logging
+from os import environ
+
+from polus.mm.utils.score_pdb_structures.score_pdb_structures import (
+    score_pdb_structures,
+)
+
+logging.basicConfig(
+    format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s",
+    datefmt="%d-%b-%y %H:%M:%S",
+)
+POLUS_LOG = getattr(logging, environ.get("POLUS_LOG", "INFO"))
+logger = logging.getLogger("polus.mm.utils.score_pdb_structures.")
+logger.setLevel(POLUS_LOG)
+
+
+def main(args: argparse.Namespace) -> None:
+    """score_pdb_structures."""
+    logger.info(f"input_pdbids: {args.input_pdbids}")
+    logger.info(f"output_txt_path: {args.output_txt_path}")
+    logger.info(f"min_row: {args.min_row}")
+    logger.info(f"max_row: {args.max_row}")
+    logger.info(f"timeout_duration: {args.timeout_duration}")
+    logger.info(f"max_retries: {args.max_retries}")
+
+    score_pdb_structures(
+        input_pdbids=args.input_pdbids,
+        output_txt_path=args.output_txt_path,
+        min_row=args.min_row,
+        max_row=args.max_row,
+        timeout_duration=args.timeout_duration,
+        max_retries=args.max_retries,
+    )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="score_pdb_structures.")
+    parser.add_argument(
+        "--input_pdbids",
+        type=str,
+        nargs="+",
+        required=True,
+        help="List of input PDBIDs to score, Type string[]",
+    )
+    parser.add_argument(
+        "--output_txt_path",
+        type=str,
+        required=True,
+        help="Path to the text dataset file, Type string",
+    )
+    parser.add_argument(
+        "--min_row",
+        type=int,
+        required=True,
+        help="The row min index, Type int",
+    )
+    parser.add_argument(
+        "--max_row",
+        type=int,
+        required=True,
+        help="The row max index, Type int",
+    )
+    parser.add_argument(
+        "--timeout_duration",
+        type=int,
+        required=True,
+        help="The maximum time to wait for a response from the API before timing out",
+    )
+    parser.add_argument(
+        "--max_retries",
+        type=int,
+        required=True,
+        help="The maximum number of times to retry the request in case of failure",
+    )
+
+    args = parser.parse_args()
+    main(args)
diff --git a/...ore-pdb-structures-plugin/src/polus/mm/utils/score_pdb_structures/score_pdb_structures.py b/...ore-pdb-structures-plugin/src/polus/mm/utils/score_pdb_structures/score_pdb_structures.py
@@ -0,0 +1,212 @@
+"""Fetches the PDB information from RCSB and scores PDB structures."""
+import time
+from pathlib import Path
+from typing import Any
+from typing import Optional
+
+import pandas as pd
+import requests
+
+SUCCESS = 200
+TOO_MANY_REQUESTS = 429
+
+
+def fetch_pdb_data(
+    pdb_id: str,
+    timeout_duration: int = 10,
+    max_retries: int = 5,
+) -> Optional[dict[str, Any]]:
+    """Retrieve information for a given PDB ID from the RCSB PDB API.
+
+    Args:
+        pdb_id (str): The PDB ID of the protein structure.
+        timeout_duration (int, optional): The maximum time in seconds to wait for
+            a response from the API before timing out. Defaults to 10.
+        max_retries (int, optional): The maximum number of times to retry the request
+            in case of failures. Defaults to 5.
+
+    Returns:
+        Optional[Dict[str, Any]]: A dictionary containing the relevant data of the
+        PDB entry, or None if the request fails.
+    """
+    url = f"https://data.rcsb.org/rest/v1/core/entry/{pdb_id}"
+    retries = 0
+
+    while retries < max_retries:
+        try:
+            response = requests.get(url, timeout=timeout_duration)
+
+            if response.status_code == SUCCESS:
+                data = response.json()
+
+                # Extract relevant data from the JSON response
+                resolution = data["rcsb_entry_info"].get("resolution_combined", [None])[
+                    0
+                ]
+                experimental_method = data["exptl"][0].get("method", "N/A")
+                citations = len(
+                    data["rcsb_entry_container_identifiers"].get("related_pubmeds", []),
+                )
+
+                # Extract r_free and r_work from the refine section
+                r_free = "N/A"
+                r_work = "N/A"
+                if "refine" in data:
+                    refine_data = data["refine"][
+                        0
+                    ]  # Assuming we're interested in the first refine entry
+                    r_free = refine_data.get("ls_rfactor_rfree", "N/A")
+                    r_work = refine_data.get("ls_rfactor_rwork", "N/A")
+
+                return {
+                    "PDB ID": pdb_id,
+                    "Resolution": resolution,
+                    "R-Free": r_free,
+                    "R-Work": r_work,
+                    "Experimental Method": experimental_method,
+                    "Citations": citations,
+                }
+            elif response.status_code == TOO_MANY_REQUESTS:  # noqa: RET505
+                # Exponential backoff
+                wait_time = 2**retries
+                print(  # noqa: T201
+                    f"Received status code 429. Retrying in {wait_time} seconds...",
+                )
+                time.sleep(wait_time)
+                retries += 1
+            else:
+                return None
+        except requests.exceptions.Timeout:
+            print(  # noqa: T201
+                f"Request to {url} timed out after {timeout_duration} seconds.",
+            )
+            return None
+        except requests.exceptions.RequestException as e:
+            print(f"An error occurred: {e}")  # noqa: T201
+            return None
+    return None
+
+
+def score_pdb_entry(entry: dict[str, Any]) -> float:
+    """Calculate a score for a PDB entry based on multiple criteria.
+
+    Args:
+        entry (Dict[str, Any]): A dictionary containing the relevant
+            data of the PDB entry
+
+    Returns:
+        float: The total score for the PDB entry.
+    """
+    # Define weights for each criterion
+    weights = {
+        "Resolution": 0.35,
+        "R-Free": 0.25,
+        "R-Work": 0.20,
+        "Citations": 0.10,
+        "Experimental Method": 0.10,
+    }
+
+    # Normalize and compute the score for each criterion
+    resolution_score = 1 / entry["Resolution"] if entry["Resolution"] else 0
+    r_free_score = 1 / entry["R-Free"] if entry["R-Free"] != "N/A" else 0
+    r_work_score = 1 / entry["R-Work"] if entry["R-Work"] != "N/A" else 0
+    citations_score = entry["Citations"]
+
+    # Score the experimental method, giving preference to X-ray diffraction
+    experimental_method_score = (
+        1 if entry["Experimental Method"].lower() == "x-ray diffraction" else 0
+    )
+
+    # Combine scores with weights
+    return (
+        weights["Resolution"] * resolution_score
+        + weights["R-Free"] * r_free_score
+        + weights["R-Work"] * r_work_score
+        + weights["Citations"] * citations_score
+        + weights["Experimental Method"] * experimental_method_score
+    )
+
+
+def filter_pdbs(  # noqa: PLR0913
+    pdb_ids: list[str],
+    output_txt_path: str,
+    min_row: int = 1,
+    max_row: int = -1,
+    timeout_duration: int = 10,
+    max_retries: int = 5,
+) -> None:
+    """Filter, score, and sort PDB entries, then save the results to a txt file.
+
+    Args:
+        pdb_ids (List[str]): A list of PDB IDs to process.
+        output_txt_path (str): The path to the output file to save the results.
+        min_row (int, optional): min index of rows. Defaults to 1.
+        max_row (int, optional): max index of rows. Defaults to -1.
+        timeout_duration (int, optional): The maximum time in seconds to wait for
+            a response from the API before timing out. Defaults to 10.
+        max_retries (int, optional): The maximum number of times to retry the request
+            in case of failures. Defaults to 5.
+
+    Returns:
+        None
+    """
+    pdb_info_list: list[dict[str, Any]] = []
+
+    for pdb_id in pdb_ids:
+        pdb_info = fetch_pdb_data(pdb_id, timeout_duration, max_retries)
+        if pdb_info is not None:
+            pdb_info_list.append(pdb_info)
+
+    # Score each PDB entry
+    for entry in pdb_info_list:
+        entry["Score"] = score_pdb_entry(entry)
+
+    # Convert to DataFrame for easy viewing and sorting
+    df = pd.DataFrame(pdb_info_list)
+    df = df.sort_values(by="Score", ascending=False)
+    print(df.shape)  # noqa: T201
+
+    if int(min_row) != 1 or int(max_row) != -1:
+        # We want to convert to zero-based indices and we also want
+        # the upper index to be inclusive (i.e. <=) so -1 lower index.
+        df = df[(int(min_row) - 1) : int(max_row)]
+        print(df)  # noqa: T201
+
+    # Now restrict to the column we want
+    with Path.open(Path(output_txt_path), mode="w", encoding="utf-8") as f:
+        f.write(",".join(df["PDB ID"].dropna().to_list()) + "\n")
+
+
+def score_pdb_structures(  # noqa: PLR0913
+    input_pdbids: list[str],
+    output_txt_path: str,
+    min_row: int = 1,
+    max_row: int = -1,
+    timeout_duration: int = 10,
+    max_retries: int = 5,
+) -> None:
+    """score_pdb_structures.
+
+    Args:
+        input_pdbids: List of input PDBIDs to score, Type string[], File type input,
+            Accepted formats list[string]
+        min_row: The row min inex, Type int
+        max_row: The row max inex, Type int
+        output_txt_path: Path to the text dataset file, Type string, File type output,
+            Accepted formats txt
+        timeout_duration (int, optional): The maximum time in seconds to wait for
+            a response from the API before timing out. Defaults to 10.
+        max_retries (int, optional): The maximum number of times to retry the request
+            in case of failures. Defaults to 5.
+
+    Returns:
+        None
+    """
+    filter_pdbs(
+        input_pdbids,
+        output_txt_path,
+        min_row=min_row,
+        max_row=max_row,
+        timeout_duration=timeout_duration,
+        max_retries=max_retries,
+    )
diff --git a/utils/score-pdb-structures-plugin/tests/__init__.py b/utils/score-pdb-structures-plugin/tests/__init__.py
@@ -0,0 +1 @@
+"""Tests for score_pdb_structures."""
diff --git a/utils/score-pdb-structures-plugin/tests/test_score_pdb_structures.py b/utils/score-pdb-structures-plugin/tests/test_score_pdb_structures.py
@@ -0,0 +1,65 @@
+"""Tests for score_pdb_structures."""
+from pathlib import Path
+
+from polus.mm.utils.score_pdb_structures.score_pdb_structures import (
+    score_pdb_structures,
+)
+from sophios.api.pythonapi import Step
+from sophios.api.pythonapi import Workflow
+
+
+def test_score_pdb_structures() -> None:
+    """Test score_pdb_structures."""
+    input_pdbids = [
+        "1g0i",
+        "1iev",
+        "1ptg",
+        "1y7v",
+        "2huo",
+        "2os9",
+        "2r71",
+        "2x1i",
+        "3bxd",
+        "1aod",
+    ]
+    score_pdb_structures(input_pdbids, "output.txt", max_row=1)
+
+    assert Path("output.txt").exists()
+
+
+def test_score_pdb_structures_cwl() -> None:
+    """Test score_pdb_structures CWL."""
+    cwl_file = Path("score_pdb_structures_0@1@0.cwl")
+
+    # Create the step for the CWL file
+    score_pdb_structures_step = Step(clt_path=cwl_file)
+    input_pdbids = [
+        "1g0i",
+        "1iev",
+        "1ptg",
+        "1y7v",
+        "2huo",
+        "2os9",
+        "2r71",
+        "2x1i",
+        "3bxd",
+        "1aod",
+    ]
+
+    score_pdb_structures_step.input_pdbids = input_pdbids
+    score_pdb_structures_step.output_txt_path = "output_scored.txt"
+    score_pdb_structures_step.max_row = 1
+
+    # Define the workflow with the step
+    steps = [score_pdb_structures_step]
+    filename = "score_pdb_structures"
+    workflow = Workflow(steps, filename)
+
+    # Run the workflow
+    workflow.run()
+
+    # Check for the existence of the output file
+    outdir = Path("outdir")
+    assert any(
+        file.name == "output_scored.txt" for file in outdir.rglob("*")
+    ), "The file output_scored.txt was not found."