From e69b48f09315201b2e83d2d60445fca6e314968e Mon Sep 17 00:00:00 2001 From: clintval Date: Thu, 19 Sep 2024 18:19:00 -0700 Subject: [PATCH 01/15] feat: use bwa-aln-interactive and upgrade developer's docs --- .github/workflows/tests.yml | 70 ++++-------- README.md | 76 ++++++++----- docs/index.md | 2 +- ...stallation-and-developers-documentation.md | 93 ++++++++++++++++ docs/installation.md | 102 ------------------ prymer.yml | 12 +-- prymer/offtarget/bwa.py | 19 +++- prymer/offtarget/offtarget_detector.py | 4 +- pyproject.toml | 5 + 9 files changed, 183 insertions(+), 200 deletions(-) create mode 100644 docs/installation-and-developers-documentation.md delete mode 100644 docs/installation.md diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 9437b81..f65d370 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -32,10 +32,18 @@ jobs: with: python-version: ${{ matrix.PYTHON_VERSION }} + - name: Install poetry + run: | + python -m pip install --upgrade pip + python -m pip install poetry==${{env.POETRY_VERSION}} + + - name: Configure poetry + shell: bash + run: poetry config virtualenvs.in-project true + - name: Set up miniconda uses: conda-incubator/setup-miniconda@v3 with: - miniforge-variant: Mambaforge miniforge-version: latest channels: conda-forge,bioconda activate-environment: prymer @@ -45,61 +53,19 @@ jobs: auto-activate-base: false python-version: ${{ matrix.PYTHON_VERSION }} - - name: Install fulcrumgenomics/bwa - shell: bash -l {0} - run: | - conda activate prymer - pushd bwa - make -j $(nproc) - cp bwa ${CONDA_PREFIX}/bin - popd - - - name: Configure poetry and check lock file - shell: bash -l {0} - run: | - conda activate prymer - poetry config virtualenvs.in-project false - poetry check --lock + - name: Install the project's dependencies + shell: bash -el {0} + run: poetry install - - name: Poetry install - shell: bash -l {0} - run: | - conda activate prymer - poetry lock --no-update - poetry install --with dev - - - name: Unit tests (with doctest and coverage) - shell: bash -l {0} - run: | - conda activate prymer - poetry run pytest --cov=prymer --cov-report=xml --cov-branch --doctest-plus --doctest-modules prymer tests + - name: Test the codebase + shell: bash -el {0} + run: poetry run pytest - name: Upload coverage reports to Codecov uses: codecov/codecov-action@v4.5.0 with: token: ${{ secrets.CODECOV_TOKEN }} - - - name: Style checking - shell: bash -l {0} - run: | - conda activate prymer - poetry run ruff format --check - - - name: Run lint - shell: bash -l {0} - run: | - conda activate prymer - poetry run ruff check - - name: Run mypy - shell: bash -l {0} - run: | - conda activate prymer - poetry run mypy - - - name: Run docs - shell: bash -l {0} - run: | - conda activate prymer - set -euo pipefail - poetry run mkdocs build --strict + - name: Test building the documentation + shell: bash -el {0} + run: set -euo pipefail && poetry run mkdocs build --strict diff --git a/README.md b/README.md index 54bc433..1def53b 100644 --- a/README.md +++ b/README.md @@ -1,44 +1,62 @@ # Python Primer Design Library [![Python Versions][language-badge]][language-link] -[![Code Style][code-style-badge]][code-style-link] -[![Type Checked][type-checking-badge]][type-checking-link] -[![PEP8][pep-8-badge]][pep-8-link] -[![Code Coverage][code-coverage-badge]][code-coverage-link] [![License][license-badge]][license-link] - ---- - -[![Install with Bioconda][bioconda-badge]][bioconda-link] -[![Bioconda][bioconda-dl-badge]][bioconda-dl-link] -[![PyPI version][pypi-badge]][pypi-link] -[![PyPI download total][pypi-downloads-badge]][pypi-downloads-link] -[![Python package][python-package-badge]][python-package-link] +[![MyPy Checked][type-checking-badge]][type-checking-link] +[![Poetry][poetry-badge]][poetry-link] +[![Ruff][ruff-badge]][ruff-link] [language-badge]: https://img.shields.io/badge/python-3.11_|_3.12-blue [language-link]: http://www.python.org/ -[code-style-badge]: https://img.shields.io/badge/code%20style-black-000000.svg -[code-style-link]: https://black.readthedocs.io/en/stable/ -[type-checking-badge]: http://www.mypy-lang.org/static/mypy_badge.svg -[type-checking-link]: http://mypy-lang.org/ -[pep-8-badge]: https://img.shields.io/badge/code%20style-pep8-brightgreen.svg -[pep-8-link]: https://www.python.org/dev/peps/pep-0008/ -[code-coverage-badge]: https://codecov.io/gh/fulcrumgenomics/prymer/branch/main/graph/badge.svg -[code-coverage-link]: https://codecov.io/gh/fulcrumgenomics/prymer [license-badge]: http://img.shields.io/badge/license-MIT-blue.svg [license-link]: https://github.com/fulcrumgenomics/prymer/blob/main/LICENSE -[bioconda-badge]: https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat +[type-checking-badge]: http://www.mypy-lang.org/static/mypy_badge.svg +[type-checking-link]: http://mypy-lang.org/ +[poetry-badge]: https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json +[poetry-link]: https://python-poetry.org/ +[ruff-badge]: https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json +[ruff-link]: https://docs.astral.sh/ruff/ + +[![Install with Bioconda][bioconda-badge]][bioconda-link] +[![PyPI version][pypi-badge]][pypi-link] + +[bioconda-badge]: https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?label=Install%20with [bioconda-link]: http://bioconda.github.io/recipes/prymer/README.html -[bioconda-dl-badge]: https://img.shields.io/conda/dn/bioconda/prymer.svg?label=Bioconda -[bioconda-dl-link]: https://anaconda.org/bioconda/prymer -[pypi-badge]: https://badge.fury.io/py/prymer.svg +[pypi-badge]: https://img.shields.io/pypi/v/prymer?label=Install%20with%20PyPi [pypi-link]: https://pypi.python.org/pypi/prymer -[pypi-downloads-badge]: https://img.shields.io/pypi/dm/prymer + +[![Bioconda][bioconda-dl-badge]][bioconda-dl-link] +[![PyPI download total][pypi-downloads-badge]][pypi-downloads-link] + + +[bioconda-dl-badge]: https://img.shields.io/conda/dn/bioconda/prymer.svg?label=Bioconda%20downloads +[bioconda-dl-link]: https://anaconda.org/bioconda/prymer +[pypi-downloads-badge]: https://img.shields.io/pypi/dm/prymer.svg?label=PyPi%20downloads [pypi-downloads-link]: https://pypi.python.org/pypi/prymer -[python-package-badge]: https://github.com/fulcrumgenomics/prymer/actions/workflows/publish_prymer.yml/badge.svg -[python-package-link]: https://github.com/fulcrumgenomics/prymer/actions/workflows/publish_prymer.yml -## Quick setup +[![tests][python-tests-badge]][python-tests-link] +[![publish prymer][publish-prymer-badge]][publish-prymer-link] +[![Code Coverage][code-coverage-badge]][code-coverage-link] + +[publish-prymer-badge]: https://github.com/fulcrumgenomics/prymer/actions/workflows/publish_prymer.yml/badge.svg +[publish-prymer-link]: https://github.com/fulcrumgenomics/prymer/actions/workflows/publish_prymer.yml +[python-tests-badge]: https://github.com/fulcrumgenomics/prymer/actions/workflows/tests.yml/badge.svg +[python-tests-link]: https://github.com/fulcrumgenomics/prymer/actions/workflows/tests.yml +[code-coverage-badge]: https://codecov.io/gh/fulcrumgenomics/prymer/branch/main/graph/badge.svg +[code-coverage-link]: https://codecov.io/gh/fulcrumgenomics/prymer + +## Recommended Installation + +The package `prymer` requires installation of [Primer3](https://github.com/primer3-org/primer3) and [interactive `bwa`](https://github.com/fulcrumgenomics/bwa-aln-interactive). + +To satisfy these requirements, it is recommended to install using [bioconda](https://bioconda.github.io/): + +```console +mamba install -c bioconda prymer +``` + +## Development and Testing -See [Installation](docs/installation.md). +See the [developer's instructions][developers-instructions-link] for more information. +[developers-instructions-link]: https://prymer.readthedocs.io/en/latest/installation-and-developers-documentation.html#installation-for-development diff --git a/docs/index.md b/docs/index.md index 4b9fde1..b676f5f 100644 --- a/docs/index.md +++ b/docs/index.md @@ -4,7 +4,7 @@ Python Primer Design Library ## Documentation Contents -* [Installation](installation.md) +* [Installation](installation-and-developers-documentation.md) * [Overview](overview.md) * [API](reference/prymer/index.md) diff --git a/docs/installation-and-developers-documentation.md b/docs/installation-and-developers-documentation.md new file mode 100644 index 0000000..2f849bd --- /dev/null +++ b/docs/installation-and-developers-documentation.md @@ -0,0 +1,93 @@ +# Installation and Developer's Documentation + +## Recommended Installation + +The package `prymer` requires installation of [Primer3](https://github.com/primer3-org/primer3) and [interactive `bwa`](https://github.com/fulcrumgenomics/bwa-aln-interactive). + +To satisfy these requirements, it is recommended to install using [bioconda](https://bioconda.github.io/): + +```console +mamba install -c bioconda prymer +``` + +## Installation for Development and Release + +1. Install the environment manager [`mamba`](https://mamba.readthedocs.io/en/latest/installation/mamba-installation.html) +2. Install the Python build tool [`poetry`](https://python-poetry.org/docs/#installing-with-the-official-installer) +3. Create an environment with Python, [Primer3](https://github.com/primer3-org/primer3), and [interactive `bwa`](https://github.com/fulcrumgenomics/bwa-aln-interactive): + + ```console + mamba env create -y -f prymer.yml + ``` + +4. Activate the environment: + + ```console + mamba activate prymer + ``` + +5. Configure `poetry` to install into pre-existing virtual environments: + + ```console + poetry config settings.virtualenvs.create false + ``` + +6. Install `prymer` into the virtual environment: + + ```console + poetry install + ``` + +# Checking the Build + +Use `poetry` to test your code. + +```console +poetry run pytest +``` + +Note that `poetry run pytest` will run `mypy` checks, `ruff` checks, `pytest` unit tests, and will provide a unit test coverage report. + +However, `pytest` will neither run the ruff formatter nor apply `ruff`'s automatic lint fixes, which can be done by calling `ruff` directly. + +```console +poetry run ruff format && poetry run ruff check --fix +``` + +# Building the Documentation + +Use `mkdocs` to build and serve the documentation. + +```console +poetry run mkdocs build && poetry run mkdocs serve +``` + +# Creating a Release on PyPi + +1. Clone the repository recursively and ensure you are on the `main` (un-dirty) branch +2. Checkout a new branch to prepare the library for release +3. Bump the version of the library to the desired SemVer with `poetry version #.#.#` +4. Commit the version bump changes with a Git commit message like `chore(release): bump to #.#.#` +5. Push the commit to the upstream remote, open a PR, ensure tests pass, and seek reviews +6. Squash merge the PR +7. Tag the new commit on the main branch of the origin repository with the new SemVer + +> [!NOTE] +> This project follows [Semantic Versioning](https://semver.org/). +> In brief: +> +> * `MAJOR` version when you make incompatible API changes +> * `MINOR` version when you add functionality in a backwards compatible manner +> * `PATCH` version when you make backwards compatible bug fixes + +GitHub Actions will take care of the remainder of the deployment and release process with: + +1. Unit tests will be run for safety-sake +2. A source distribution will be built +3. Multi-arch multi-Python binary distributions will be built +4. Assets will be deployed to PyPi with the new SemVer +5. A [Conventional Commit](https://www.conventionalcommits.org/en/v1.0.0/)-aware changelog will be drafted +6. A GitHub release will be created with the new SemVer and the drafted changelog + +> [!IMPORTANT] +> Consider editing the changelog if there are any errors or necessary enhancements. diff --git a/docs/installation.md b/docs/installation.md deleted file mode 100644 index e173dac..0000000 --- a/docs/installation.md +++ /dev/null @@ -1,102 +0,0 @@ -# Installation - - -## Installing `prymer` - -The installation requires three steps: - -1. Install python and other dependencies with `conda` -2. Install the custom version of bwa -3. Install `prymer` with `poetry. - -Install the required Python version, [`poetry`](https://github.com/python-poetry/poetry), and [`primer3](https://github.com/primer3-org/primer3) into your environment manager of choice, e.g. - -```sh -$ mamba env create -y -f prymer.yml -$ conda activate prymer -``` - -Install the custom version of bwa: -```sh -$ git clone -b interactive_aln git@github.com:fulcrumgenomics/bwa.git -$ cd bwa -$ make -j 12 -$ cp bwa ${CONDA_PREFIX}/bin -``` - -Note: the `virtualenvs.create false` setting in `poetry.toml` stops poetry from creating new virtual environments and forces it to use the active conda environment instead. -This can be set once per machine/user and stored in the user's poetry configuration with: - -```sh -$ poetry config settings.virtualenvs.create false -``` - -Install the prymer with `poetry`. - -```console -$ poetry install -``` - -## Getting Setup for Development Work - -Follow the [instructions above](#installing-prymer) - -```console -$ poetry install --with dev -``` - -## Checking the Build - -Make sure that [instructions for development work](#getting-setup-for-development-work) have been followed. - -Use `poetry` to format, lint, type-check, and test your code. -Note that `poetry run pytest` will run `mypy` and `ruff` code checks in addition to `pytest` unit tests, and will provide a unit test coverage report. - -```console -$ poetry run pytest -``` - -However, `pytest` will neither run the ruff formatter nor apply `ruff`'s automatic lint fixes, which can be done by calling `ruff` directly. - -```console -$ poetry run ruff format && poetry run ruff check --fix -``` - -Static type checking is performed using `mpyp`. - -```console -poetry run mypy -``` - -## Building the Documentation - -Make sure that [instructions for development work](#getting-setup-for-development-work) have been followed. - -Use `mkdocs` to build and serve the documentation. - -```console -$ poetry install --with dev -$ poetry run mkdocs build -$ poetry run mkdocs serve -``` - -## Creating a Release on PyPi - -1. Clone the repository recursively and ensure you are on the `main` (un-dirty) branch -2. Checkout a new branch to prepare the library for release -3. Bump the version of the library to the desired SemVer with `poetry version #.#.#` -4. Commit the version bump changes with a Git commit message like `chore(release): bump to #.#.#` -5. Push the commit to the upstream remote, open a PR, ensure tests pass, and seek reviews -6. Squash merge the PR -7. Tag the new commit on the main branch of the repository with the new SemVer - -GitHub Actions will take care of the remainder of the deployment and release process with: - -1. Unit tests will be run for safety-sake -2. A source distribution will be built -3. Many multi-arch multi-Python binary distributions will be built -4. Assets will be deployed to PyPi with the new SemVer -5. A [Conventional Commit](https://www.conventionalcommits.org/en/v1.0.0/)-aware changelog will be drafted -6. A GitHub release will be created with the new SemVer and the drafted changelog - -Consider editing the changelog if there are any errors or necessary enhancements. diff --git a/prymer.yml b/prymer.yml index 44d0515..817a458 100644 --- a/prymer.yml +++ b/prymer.yml @@ -3,12 +3,6 @@ channels: - bioconda - conda-forge dependencies: - # Python - - python>=3.11.* - - ruff>=0.2.1 - - mypy>=1.8 - - pytest>=8.0.0 - - pytest-workflow=2.1.0 - - poetry=1.7.1 - - primer3=2.6.1 - - pyproject_hooks=1.0.0 + - bioconda::bwa-aln-interactive=0.7.18 + - bioconda::primer3=2.6.1 + - conda-forge::python>=3.11.* diff --git a/prymer/offtarget/bwa.py b/prymer/offtarget/bwa.py index 7c4bd47..5b15bc9 100644 --- a/prymer/offtarget/bwa.py +++ b/prymer/offtarget/bwa.py @@ -15,7 +15,13 @@ hits in the "XA" tag than the total number hits reported in the "HN". This occurs when BWA finds more hits than `max_hits` (see `bwt aln -X`). - ## Example +Use of this module requires installation of a custom version of BWA named `bwa-aln-interactive`. +See: + + - https://github.com/fulcrumgenomics/bwa/tree/interactive_aln + - https://bioconda.github.io/recipes/bwa-aln-interactive/README.html + +## Example ```python >>> from pathlib import Path @@ -193,7 +199,10 @@ class BwaAlnInteractive(ExecutableRunner): the process running and be able to send it chunks of reads periodically and get alignments back without waiting for a full batch of reads to be sent. - See: https://github.com/fulcrumgenomics/bwa/tree/interactive_aln + See: + - https://bioconda.github.io/recipes/bwa-aln-interactive/README.html + - https://github.com/fulcrumgenomics/bwa/tree/interactive_aln + Attributes: max_hits: the maximum number of hits to report - if more than this number of seed hits @@ -207,7 +216,7 @@ def __init__( self, ref: Path, max_hits: int, - executable: str | Path = "bwa", + executable: str | Path = "bwa-aln-interactive", max_mismatches: int = 3, max_mismatches_in_seed: int = 3, max_gap_opens: int = 0, @@ -222,7 +231,7 @@ def __init__( ref: the path to the reference FASTA, which must be indexed with bwa. max_hits: the maximum number of hits to report - if more than this number of seed hits are found, report only the count and not each hit. - executable: string or Path representation of the `bwa` executable path + executable: string or Path representation of the `bwa-aln-interactive` executable path max_mismatches: the maximum number of mismatches allowed in the full query sequence max_mismatches_in_seed: the maximum number of mismatches allowed in the seed region max_gap_opens: the maximum number of gap opens allowed in the full query sequence @@ -252,7 +261,7 @@ def __init__( else: message = "BWA index file does not exist:\n\t" message += "\t\n".join(f"{p}" for p in missing_aux_paths) - raise FileNotFoundError(f"{message}\nPlease index with: `bwa index {ref}`") + raise FileNotFoundError(f"{message}\nPlease index with: `{executable_path} index {ref}`") # -N = non-iterative mode: search for all n-difference hits (slooow) # -S = output SAM (run samse) diff --git a/prymer/offtarget/offtarget_detector.py b/prymer/offtarget/offtarget_detector.py index 1773042..1876ee0 100644 --- a/prymer/offtarget/offtarget_detector.py +++ b/prymer/offtarget/offtarget_detector.py @@ -123,7 +123,7 @@ class OffTargetResult: class OffTargetDetector: """A class for detecting off-target mappings of primers and primer pairs that uses a custom - version of "bwa aln". + version of "bwa aln" named "bwa-aln-interactive". The off-target detection is faster and more sensitive than traditional isPCR and in addition can correctly detect primers that are repetitive and contain many thousands or millions of mappings @@ -146,7 +146,7 @@ def __init__( threads: Optional[int] = None, keep_spans: bool = True, keep_primer_spans: bool = True, - executable: str | Path = "bwa", + executable: str | Path = "bwa-aln-interactive", ) -> None: """ Args: diff --git a/pyproject.toml b/pyproject.toml index 4ccb6cb..e4a2898 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -125,6 +125,11 @@ minversion = "7.4" addopts = [ "--ignore=docs/scripts", "--color=yes", + "--cov", + "--cov-report=xml", + "--cov-branch", + "--doctest-plus", + "--doctest-modules", "--mypy", "--ruff", "--doctest-plus", From 72abb30d66337ccacf4f71a5e13ccee5258fe52c Mon Sep 17 00:00:00 2001 From: clintval Date: Thu, 19 Sep 2024 21:53:21 -0700 Subject: [PATCH 02/15] feat: remove 'hack' --- prymer/offtarget/bwa.py | 33 ++++++++++++++++----------------- pyproject.toml | 1 - 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/prymer/offtarget/bwa.py b/prymer/offtarget/bwa.py index 5b15bc9..514b0d3 100644 --- a/prymer/offtarget/bwa.py +++ b/prymer/offtarget/bwa.py @@ -51,10 +51,10 @@ from typing import cast import pysam -from fgpyo import sam from fgpyo import sequence from fgpyo.sam import Cigar from pysam import AlignedSegment +from pysam import AlignmentHeader from prymer.api import coordmath from prymer.util.executable_runner import ExecutableRunner @@ -261,7 +261,7 @@ def __init__( else: message = "BWA index file does not exist:\n\t" message += "\t\n".join(f"{p}" for p in missing_aux_paths) - raise FileNotFoundError(f"{message}\nPlease index with: `{executable_path} index {ref}`") + raise FileNotFoundError(f"{message}\nIndex with: `{executable_path} index {ref}`") # -N = non-iterative mode: search for all n-difference hits (slooow) # -S = output SAM (run samse) @@ -293,20 +293,18 @@ def __init__( super().__init__(command=command) - # HACK ALERT - # This is a hack. By trial and error, pysam.AlignmentFile() will block reading unless - # there's at least a few records due to htslib wanting to read a few records for format - # auto-detection. Lame. So a hundred queries are sent to the aligner to align enable the - # htslib auto-detection to complete, and for us to be able to read using pysam. - num_warmup: int = 100 - for i in range(num_warmup): - query = Query(id=f"ignoreme:{i}", bases="A" * 100) - fastq_str = query.to_fastq(reverse_complement=self.reverse_complement) - self._subprocess.stdin.write(fastq_str) + # Send in a single record to be aligned so we get bwa to output a SAM header. + self._subprocess.stdin.write(Query(id="ignore", bases="A").to_fastq()) self.__signal_bwa() # forces the input to be sent to the underlying process. - self._reader = sam.reader(path=self._subprocess.stdout, file_type=sam.SamFileType.SAM) - for _ in range(num_warmup): - next(self._reader) + + header = [] + for line in self._subprocess.stdout: + if line.startswith("@"): + header.append(line) + if line.startswith("ignore"): + break + + self._header = AlignmentHeader.from_text("".join(header)) def __signal_bwa(self) -> None: """Signals BWA to process the queries""" @@ -349,7 +347,9 @@ def map_all(self, queries: list[Query]) -> list[BwaResult]: results: list[BwaResult] = [] for query in queries: # get the next alignment and convert to a result - results.append(self._to_result(query=query, rec=next(self._reader))) + line: str = next(self._subprocess.stdout).strip() + alignment = AlignedSegment.fromstring(line, self._header) + results.append(self._to_result(query=query, rec=alignment)) return results @@ -423,5 +423,4 @@ def to_hits(self, rec: AlignedSegment) -> list[BwaHit]: return hits def close(self) -> None: - self._reader.close() super().close() diff --git a/pyproject.toml b/pyproject.toml index e4a2898..65a7823 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -154,5 +154,4 @@ ignore = ["E203", "E701"] unfixable = ["B"] [tool.ruff.lint.isort] - force-single-line = true From d504f3081deb4dd495be38dd5f17e348a20fb728 Mon Sep 17 00:00:00 2001 From: clintval Date: Thu, 3 Oct 2024 17:01:37 -0700 Subject: [PATCH 03/15] fix: use more newlines for the Bioconda bwa-aln-interactive --- prymer/offtarget/bwa.py | 14 ++++---- prymer/offtarget/offtarget_detector.py | 3 +- tests/api/test_picking.py | 47 ++++++++++++-------------- 3 files changed, 31 insertions(+), 33 deletions(-) diff --git a/prymer/offtarget/bwa.py b/prymer/offtarget/bwa.py index 514b0d3..7645fbb 100644 --- a/prymer/offtarget/bwa.py +++ b/prymer/offtarget/bwa.py @@ -210,6 +210,7 @@ class BwaAlnInteractive(ExecutableRunner): reverse_complement: reverse complement each query sequence before alignment. include_alt_hits: if True include hits to references with names ending in _alt, otherwise do not include them. + header: the SAM alignment header. """ def __init__( @@ -293,7 +294,7 @@ def __init__( super().__init__(command=command) - # Send in a single record to be aligned so we get bwa to output a SAM header. + # Send a sentinel record to be aligned so we know when bwa is done outputting a header. self._subprocess.stdin.write(Query(id="ignore", bases="A").to_fastq()) self.__signal_bwa() # forces the input to be sent to the underlying process. @@ -304,14 +305,13 @@ def __init__( if line.startswith("ignore"): break - self._header = AlignmentHeader.from_text("".join(header)) + self.header = AlignmentHeader.from_text("".join(header)) def __signal_bwa(self) -> None: """Signals BWA to process the queries""" - for _ in range(3): - self._subprocess.stdin.flush() - self._subprocess.stdin.write("\n\n") - self._subprocess.stdin.flush() + self._subprocess.stdin.flush() + self._subprocess.stdin.write("\n" * 16) + self._subprocess.stdin.flush() def map_one(self, query: str, id: str = "unknown") -> BwaResult: """Maps a single query to the genome and returns the result. @@ -348,7 +348,7 @@ def map_all(self, queries: list[Query]) -> list[BwaResult]: for query in queries: # get the next alignment and convert to a result line: str = next(self._subprocess.stdout).strip() - alignment = AlignedSegment.fromstring(line, self._header) + alignment = AlignedSegment.fromstring(line, self.header) results.append(self._to_result(query=query, rec=alignment)) return results diff --git a/prymer/offtarget/offtarget_detector.py b/prymer/offtarget/offtarget_detector.py index 1876ee0..dcb510a 100644 --- a/prymer/offtarget/offtarget_detector.py +++ b/prymer/offtarget/offtarget_detector.py @@ -75,6 +75,7 @@ """ # noqa: E501 import itertools +from contextlib import AbstractContextManager from dataclasses import dataclass from dataclasses import field from dataclasses import replace @@ -121,7 +122,7 @@ class OffTargetResult: right_primer_spans: list[Span] = field(default_factory=list) -class OffTargetDetector: +class OffTargetDetector(AbstractContextManager): """A class for detecting off-target mappings of primers and primer pairs that uses a custom version of "bwa aln" named "bwa-aln-interactive". diff --git a/tests/api/test_picking.py b/tests/api/test_picking.py index fae8179..9d58c26 100644 --- a/tests/api/test_picking.py +++ b/tests/api/test_picking.py @@ -573,31 +573,28 @@ def _pick_top_primer_pairs( max_primer_pair_hits: int, min_difference: int = 1, ) -> list[PrimerPair]: - offtarget_detector = OffTargetDetector( - ref=picking_ref, - max_primer_hits=max_primer_hits, - max_primer_pair_hits=max_primer_pair_hits, - three_prime_region_length=5, - max_mismatches_in_three_prime_region=0, - max_mismatches=0, - max_amplicon_size=params.amplicon_sizes.max, - ) - dimer_checker = NtThermoAlign() - - picked = pick_top_primer_pairs( - primer_pairs=primer_pairs, - num_primers=len(primer_pairs), - min_difference=min_difference, - params=params, - offtarget_detector=offtarget_detector, - is_dimer_tm_ok=lambda s1, s2: ( - dimer_checker.duplex_tm(s1=s1, s2=s2) <= params.max_dimer_tm - ), - ) - offtarget_detector.close() - dimer_checker.close() - - return picked + with ( + OffTargetDetector( + ref=picking_ref, + max_primer_hits=max_primer_hits, + max_primer_pair_hits=max_primer_pair_hits, + three_prime_region_length=5, + max_mismatches_in_three_prime_region=0, + max_mismatches=0, + max_amplicon_size=params.amplicon_sizes.max, + ) as offtarget_detector, + NtThermoAlign() as dimer_checker, + ): + return pick_top_primer_pairs( + primer_pairs=primer_pairs, + num_primers=len(primer_pairs), + min_difference=min_difference, + params=params, + offtarget_detector=offtarget_detector, + is_dimer_tm_ok=lambda s1, s2: ( + dimer_checker.duplex_tm(s1=s1, s2=s2) <= params.max_dimer_tm + ), + ) _PARAMS: FilteringParams = _zero_score_filtering_params(_score_input()) From 85bff5896258a84dcaa50e07380feffa0d524598 Mon Sep 17 00:00:00 2001 From: clintval Date: Fri, 4 Oct 2024 10:50:22 -0700 Subject: [PATCH 04/15] chore: add back GHA checkout --- .github/workflows/tests.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index cf4afd1..1a85b16 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -18,6 +18,8 @@ jobs: matrix: PYTHON_VERSION: ["3.11", "3.12"] steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.PYTHON_VERSION }} uses: actions/setup-python@v5 with: @@ -36,7 +38,7 @@ jobs: uses: conda-incubator/setup-miniconda@v3 with: miniforge-version: latest - channels: conda-forge,bioconda + channels: bioconda,conda-forge activate-environment: prymer environment-file: prymer.yml channel-priority: true From 1a4578257f739fec0c77bb3d348524cbcfccb2ff Mon Sep 17 00:00:00 2001 From: clintval Date: Fri, 4 Oct 2024 10:54:25 -0700 Subject: [PATCH 05/15] chore: update stale URLs in docs --- prymer/offtarget/bwa.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prymer/offtarget/bwa.py b/prymer/offtarget/bwa.py index a6c0f7d..61500d1 100644 --- a/prymer/offtarget/bwa.py +++ b/prymer/offtarget/bwa.py @@ -18,7 +18,7 @@ Use of this module requires installation of a custom version of BWA named `bwa-aln-interactive`. See: - - https://github.com/fulcrumgenomics/bwa/tree/interactive_aln + - https://github.com/fulcrumgenomics/bwa-aln-interactive - https://bioconda.github.io/recipes/bwa-aln-interactive/README.html ## Example @@ -201,8 +201,8 @@ class BwaAlnInteractive(ExecutableRunner): back without waiting for a full batch of reads to be sent. See: + - https://github.com/fulcrumgenomics/bwa-aln-interactive - https://bioconda.github.io/recipes/bwa-aln-interactive/README.html - - https://github.com/fulcrumgenomics/bwa/tree/interactive_aln Attributes: From 6d46ea7a5df4c4b85ac61fb4adfd8df876f0c21d Mon Sep 17 00:00:00 2001 From: clintval Date: Fri, 4 Oct 2024 11:16:43 -0700 Subject: [PATCH 06/15] chore: fix up Markdown rendering --- .gitignore | 1 + docs/installation-and-developers-documentation.md | 7 +++---- prymer/offtarget/bwa.py | 10 +++++----- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index b745cbd..40d7f14 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.DS_Store .vscode/ # Byte-compiled / optimized / DLL files diff --git a/docs/installation-and-developers-documentation.md b/docs/installation-and-developers-documentation.md index 2f849bd..b3d0229 100644 --- a/docs/installation-and-developers-documentation.md +++ b/docs/installation-and-developers-documentation.md @@ -38,7 +38,7 @@ mamba install -c bioconda prymer poetry install ``` -# Checking the Build +## Checking the Build Use `poetry` to test your code. @@ -47,14 +47,13 @@ poetry run pytest ``` Note that `poetry run pytest` will run `mypy` checks, `ruff` checks, `pytest` unit tests, and will provide a unit test coverage report. - However, `pytest` will neither run the ruff formatter nor apply `ruff`'s automatic lint fixes, which can be done by calling `ruff` directly. ```console poetry run ruff format && poetry run ruff check --fix ``` -# Building the Documentation +## Building the Documentation Use `mkdocs` to build and serve the documentation. @@ -62,7 +61,7 @@ Use `mkdocs` to build and serve the documentation. poetry run mkdocs build && poetry run mkdocs serve ``` -# Creating a Release on PyPi +## Creating a Release on PyPi 1. Clone the repository recursively and ensure you are on the `main` (un-dirty) branch 2. Checkout a new branch to prepare the library for release diff --git a/prymer/offtarget/bwa.py b/prymer/offtarget/bwa.py index 61500d1..2c1cd3b 100644 --- a/prymer/offtarget/bwa.py +++ b/prymer/offtarget/bwa.py @@ -18,8 +18,8 @@ Use of this module requires installation of a custom version of BWA named `bwa-aln-interactive`. See: - - https://github.com/fulcrumgenomics/bwa-aln-interactive - - https://bioconda.github.io/recipes/bwa-aln-interactive/README.html +- [https://github.com/fulcrumgenomics/bwa-aln-interactive](https://github.com/fulcrumgenomics/bwa-aln-interactive) +- [https://bioconda.github.io/recipes/bwa-aln-interactive/README.html](https://bioconda.github.io/recipes/bwa-aln-interactive/README.html) ## Example @@ -192,7 +192,7 @@ class BwaResult: """The default length of the seed region""" BWA_AUX_EXTENSIONS: list[str] = [".amb", ".ann", ".bwt", ".pac", ".sa"] -"""The file extensiosn for BWA index files""" +"""The file extensions for BWA index files""" class BwaAlnInteractive(ExecutableRunner): @@ -201,9 +201,9 @@ class BwaAlnInteractive(ExecutableRunner): back without waiting for a full batch of reads to be sent. See: - - https://github.com/fulcrumgenomics/bwa-aln-interactive - - https://bioconda.github.io/recipes/bwa-aln-interactive/README.html + - [https://github.com/fulcrumgenomics/bwa-aln-interactive](https://github.com/fulcrumgenomics/bwa-aln-interactive) + - [https://bioconda.github.io/recipes/bwa-aln-interactive/README.html](https://bioconda.github.io/recipes/bwa-aln-interactive/README.html) Attributes: max_hits: the maximum number of hits to report - if more than this number of seed hits From 45c361ce89c56d8d80c296e5c05c68367814b957 Mon Sep 17 00:00:00 2001 From: clintval Date: Mon, 7 Oct 2024 11:57:23 -0700 Subject: [PATCH 07/15] docs: change to new poetry syntax --- docs/installation-and-developers-documentation.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/installation-and-developers-documentation.md b/docs/installation-and-developers-documentation.md index b3d0229..2294d3c 100644 --- a/docs/installation-and-developers-documentation.md +++ b/docs/installation-and-developers-documentation.md @@ -29,7 +29,7 @@ mamba install -c bioconda prymer 5. Configure `poetry` to install into pre-existing virtual environments: ```console - poetry config settings.virtualenvs.create false + poetry config virtualenvs.create false ``` 6. Install `prymer` into the virtual environment: From d9f8baccd47d0ac05ad7848bb1e0152a8da128d4 Mon Sep 17 00:00:00 2001 From: clintval Date: Mon, 7 Oct 2024 12:45:34 -0700 Subject: [PATCH 08/15] chore: try out poetry install GHA --- .github/workflows/tests.yml | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 1a85b16..24a3c0e 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -25,14 +25,11 @@ jobs: with: python-version: ${{ matrix.PYTHON_VERSION }} - - name: Install poetry - run: | - python -m pip install --upgrade pip - python -m pip install poetry==${{env.POETRY_VERSION}} - - - name: Configure poetry - shell: bash - run: poetry config virtualenvs.in-project true + - name: Install and configure Poetry + uses: snok/install-poetry@v1 + with: + version: ${{env.POETRY_VERSION}} + installer-parallel: true - name: Set up miniconda uses: conda-incubator/setup-miniconda@v3 @@ -61,4 +58,4 @@ jobs: - name: Test building the documentation shell: bash -el {0} - run: set -euo pipefail && poetry run mkdocs build --strict + run: poetry run mkdocs build --strict From a7cf267766595359656ffd0d119ef363af56963b Mon Sep 17 00:00:00 2001 From: clintval Date: Mon, 7 Oct 2024 12:56:03 -0700 Subject: [PATCH 09/15] chore: make it clear poetry installs outside the mamba env --- .github/workflows/tests.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 24a3c0e..f7b5ea1 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -31,6 +31,9 @@ jobs: version: ${{env.POETRY_VERSION}} installer-parallel: true + - name: Install the project's dependencies + run: poetry install + - name: Set up miniconda uses: conda-incubator/setup-miniconda@v3 with: @@ -43,10 +46,6 @@ jobs: auto-activate-base: false python-version: ${{ matrix.PYTHON_VERSION }} - - name: Install the project's dependencies - shell: bash -el {0} - run: poetry install - - name: Test the codebase shell: bash -el {0} run: poetry run pytest From 11b5f9eb8778fd4b400f6b3d9aba2ee2a4c4c878 Mon Sep 17 00:00:00 2001 From: clintval Date: Mon, 7 Oct 2024 12:58:09 -0700 Subject: [PATCH 10/15] fix: revert order of install --- .github/workflows/tests.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index f7b5ea1..5be102f 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -31,9 +31,6 @@ jobs: version: ${{env.POETRY_VERSION}} installer-parallel: true - - name: Install the project's dependencies - run: poetry install - - name: Set up miniconda uses: conda-incubator/setup-miniconda@v3 with: @@ -46,6 +43,9 @@ jobs: auto-activate-base: false python-version: ${{ matrix.PYTHON_VERSION }} + - name: Install the project's dependencies + run: poetry install + - name: Test the codebase shell: bash -el {0} run: poetry run pytest From 5cae4ad1402749558006af40e1c15f842e656a74 Mon Sep 17 00:00:00 2001 From: clintval Date: Mon, 7 Oct 2024 12:59:23 -0700 Subject: [PATCH 11/15] fix: use bash shell with conda --- .github/workflows/tests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 5be102f..24a3c0e 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -44,6 +44,7 @@ jobs: python-version: ${{ matrix.PYTHON_VERSION }} - name: Install the project's dependencies + shell: bash -el {0} run: poetry install - name: Test the codebase From a6205c938270f097076e3f77ad9934381fc500cc Mon Sep 17 00:00:00 2001 From: clintval Date: Mon, 7 Oct 2024 13:01:22 -0700 Subject: [PATCH 12/15] docs: add a line comment about newlines --- prymer/offtarget/bwa.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/prymer/offtarget/bwa.py b/prymer/offtarget/bwa.py index 2c1cd3b..e34ebe8 100644 --- a/prymer/offtarget/bwa.py +++ b/prymer/offtarget/bwa.py @@ -305,8 +305,10 @@ def __init__( self.header = AlignmentHeader.from_text("".join(header)) def __signal_bwa(self) -> None: - """Signals BWA to process the queries""" + """Signals BWA to process the queries.""" self._subprocess.stdin.flush() + # NB: the executable compiled on different platforms require a different number of newlines + # NB: it is not currently understood why this is, but 16 spaces seems to work for all tested self._subprocess.stdin.write("\n" * 16) self._subprocess.stdin.flush() From 916ec9284c343b02d5d19bf3886f4625c1ef11ee Mon Sep 17 00:00:00 2001 From: clintval Date: Tue, 15 Oct 2024 16:14:58 -0700 Subject: [PATCH 13/15] chore: satisfy the coderabbit --- prymer/offtarget/bwa.py | 5 ++++- prymer/offtarget/offtarget_detector.py | 3 ++- pyproject.toml | 2 -- tests/api/test_picking.py | 3 +++ tests/offtarget/test_offtarget.py | 2 ++ 5 files changed, 11 insertions(+), 4 deletions(-) diff --git a/prymer/offtarget/bwa.py b/prymer/offtarget/bwa.py index e34ebe8..76599bb 100644 --- a/prymer/offtarget/bwa.py +++ b/prymer/offtarget/bwa.py @@ -60,6 +60,9 @@ from prymer.api import coordmath from prymer.util.executable_runner import ExecutableRunner +BWA_EXECUTABLE_NAME: str = "bwa-aln-interactive" +"""The executable name for the interactive build of bwa aln.""" + @dataclass(init=True, frozen=True) class Query: @@ -218,7 +221,7 @@ def __init__( self, ref: Path, max_hits: int, - executable: str | Path = "bwa-aln-interactive", + executable: str | Path = BWA_EXECUTABLE_NAME, max_mismatches: int = 3, max_mismatches_in_seed: int = 3, max_gap_opens: int = 0, diff --git a/prymer/offtarget/offtarget_detector.py b/prymer/offtarget/offtarget_detector.py index 9d0d3ed..0a09b93 100644 --- a/prymer/offtarget/offtarget_detector.py +++ b/prymer/offtarget/offtarget_detector.py @@ -90,6 +90,7 @@ from prymer.api.primer import Primer from prymer.api.primer_pair import PrimerPair from prymer.api.span import Span +from prymer.offtarget.bwa import BWA_EXECUTABLE_NAME from prymer.offtarget.bwa import BwaAlnInteractive from prymer.offtarget.bwa import BwaHit from prymer.offtarget.bwa import BwaResult @@ -164,7 +165,7 @@ def __init__( threads: Optional[int] = None, keep_spans: bool = True, keep_primer_spans: bool = True, - executable: str | Path = "bwa-aln-interactive", + executable: str | Path = BWA_EXECUTABLE_NAME, ) -> None: """ Initialize an [[OffTargetDetector]]. diff --git a/pyproject.toml b/pyproject.toml index 4955013..049ed72 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -128,8 +128,6 @@ addopts = [ "--cov", "--cov-report=xml", "--cov-branch", - "--doctest-plus", - "--doctest-modules", "--mypy", "--ruff", "--doctest-plus", diff --git a/tests/api/test_picking.py b/tests/api/test_picking.py index 3018155..4ad2837 100644 --- a/tests/api/test_picking.py +++ b/tests/api/test_picking.py @@ -27,6 +27,7 @@ from prymer.api.picking import score as picking_score from prymer.ntthal import NtThermoAlign from prymer.offtarget import OffTargetDetector +from prymer.offtarget.bwa import BWA_EXECUTABLE_NAME @pytest.fixture @@ -583,6 +584,7 @@ def _pick_top_primer_pairs( max_mismatches_in_three_prime_region=0, max_mismatches=0, max_amplicon_size=params.amplicon_sizes.max, + executable=BWA_EXECUTABLE_NAME, ) as offtarget_detector, ): picked: list[PrimerPair] = pick_top_primer_pairs( @@ -890,6 +892,7 @@ def test_and_pick_primer_pairs( max_mismatches_in_three_prime_region=0, max_mismatches=0, max_amplicon_size=params.amplicon_sizes.max, + executable=BWA_EXECUTABLE_NAME, ) with pysam.FastaFile(f"{picking_ref}") as fasta: diff --git a/tests/offtarget/test_offtarget.py b/tests/offtarget/test_offtarget.py index aabd2c2..e3b177d 100644 --- a/tests/offtarget/test_offtarget.py +++ b/tests/offtarget/test_offtarget.py @@ -12,6 +12,7 @@ from prymer.offtarget import BwaResult from prymer.offtarget import OffTargetDetector from prymer.offtarget import OffTargetResult +from prymer.offtarget.bwa import BWA_EXECUTABLE_NAME def _build_detector( @@ -36,6 +37,7 @@ def _build_detector( cache_results=cache_results, keep_spans=True, keep_primer_spans=True, + executable=BWA_EXECUTABLE_NAME, ) From 9976757813c58f2140f6823c347a675dcdee258a Mon Sep 17 00:00:00 2001 From: Clint Valentine Date: Wed, 16 Oct 2024 07:48:28 -0700 Subject: [PATCH 14/15] Update prymer/offtarget/bwa.py Co-authored-by: Tim Fennell --- prymer/offtarget/bwa.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prymer/offtarget/bwa.py b/prymer/offtarget/bwa.py index 76599bb..9f0f102 100644 --- a/prymer/offtarget/bwa.py +++ b/prymer/offtarget/bwa.py @@ -311,7 +311,7 @@ def __signal_bwa(self) -> None: """Signals BWA to process the queries.""" self._subprocess.stdin.flush() # NB: the executable compiled on different platforms require a different number of newlines - # NB: it is not currently understood why this is, but 16 spaces seems to work for all tested + # NB: it is not currently understood why this is, but 16 newlines seems to work for all tested self._subprocess.stdin.write("\n" * 16) self._subprocess.stdin.flush() From d997e1318248e78b5303c8dd7b0e26a67d79039e Mon Sep 17 00:00:00 2001 From: clintval Date: Wed, 16 Oct 2024 07:51:24 -0700 Subject: [PATCH 15/15] chore: fix line limit lint --- prymer/offtarget/bwa.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prymer/offtarget/bwa.py b/prymer/offtarget/bwa.py index 9f0f102..54f74ae 100644 --- a/prymer/offtarget/bwa.py +++ b/prymer/offtarget/bwa.py @@ -310,8 +310,8 @@ def __init__( def __signal_bwa(self) -> None: """Signals BWA to process the queries.""" self._subprocess.stdin.flush() - # NB: the executable compiled on different platforms require a different number of newlines - # NB: it is not currently understood why this is, but 16 newlines seems to work for all tested + # NB: the executable compiled on different platforms requires a different number of newlines + # NB: it is not understood why, but 16 newlines seems to work for all platforms tested self._subprocess.stdin.write("\n" * 16) self._subprocess.stdin.flush()