diff --git a/.ebextensions/01_download_data.config b/.ebextensions/01_download_data.config index b2ec402..8ee01fd 100644 --- a/.ebextensions/01_download_data.config +++ b/.ebextensions/01_download_data.config @@ -3,46 +3,24 @@ commands: command: "yum install -y python-devel postgresql-devel" 02_install_aws_cli: command: "yum install -y awscli" - 03_install_unzip: - command: "yum install -y unzip" - 04_eb_packages: + 03_eb_packages: command: "/var/app/venv/staging-LQM1lest/bin/pip install uvloop websockets httptools typing-extensions" - 05_export_eb_env_var: + 04_export_eb_env_var: command: "export $(cat /opt/elasticbeanstalk/deployment/env | xargs)" container_commands: - 01_uta_permissions: - test: test -d "/var/app/venv/staging-LQM1lest/lib/python3.8/site-packages/uta_tools" - command: "chmod -R 777 /var/app/venv/staging-LQM1lest/lib/python3.8/site-packages/uta_tools/data" + 01_s3_download: + test: test ! -d "/usr/local/share/seqrepo/2024-02-20" + command: "aws s3 cp s3://${AWS_BUCKET_NAME}/${AWS_SEQREPO_OBJECT} /usr/local/share/seqrepo.tar.gz --region us-east-2" - 02_s3_download: - test: test ! -d "/usr/local/share/seqrepo" - command: "aws s3 cp s3://${AWS_BUCKET_NAME}/${AWS_SEQREPO_OBJECT} /usr/local/share/seqrepo.zip --region us-east-2" + 02_extract_seqrepo: + test: test -f "/usr/local/share/seqrepo.tar.gz" + command: "mkdir -p /usr/local/share/seqrepo/2024-02-20 && tar -xzvf /usr/local/share/seqrepo.tar.gz -C /usr/local/share/seqrepo/2024-02-20" - 03_unzip_seqrepo: - test: test -f "/usr/local/share/seqrepo.zip" - command: "unzip /usr/local/share/seqrepo.zip -d /usr/local/share" + 03_seqrepo_zip_permission: + test: test -f "/usr/local/share/seqrepo.tar.gz" + command: "chmod +wr /usr/local/share/seqrepo.tar.gz" - 04_seqrepo_permission: - test: test -d "/usr/local/share/seqrepo" - command: "chmod -R 777 /usr/local/share/seqrepo" - - 05_macosx_permission: - test: test -d "/usr/local/share/__MACOSX" - command: "chmod -R +wr /usr/local/share/__MACOSX" - - 06_seqrepo_zip_permission: - test: test -f "/usr/local/share/seqrepo.zip" - command: "chmod +wr /usr/local/share/seqrepo.zip" - - 07_remove_macosx: - test: test -d "/usr/local/share/__MACOSX" - command: "rm -R /usr/local/share/__MACOSX" - - 08_remove_seqrepo_zip: - test: test -f "/usr/local/share/seqrepo.zip" - command: "rm /usr/local/share/seqrepo.zip" - - 9_data_permission: - test: test -d "/usr/local/share/seqrepo" - command: "chmod -R +wrx /usr/local/share/seqrepo" \ No newline at end of file + 04_remove_seqrepo_zip: + test: test -f "/usr/local/share/seqrepo.tar.gz" + command: "rm /usr/local/share/seqrepo.tar.gz" diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..712fa99 --- /dev/null +++ b/.flake8 @@ -0,0 +1,12 @@ +[flake8] +ignore = E129,E133,E203,E221,E241,E251,E303,E266,H106,H904,W291 +max-line-length = 100 +max-complexity = 15 +hang-closing = true +exclude = + .eggs + .tox + build + dist + docs/conf.py + tests/* diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 0000000..a1c7aec --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,3 @@ +# .git-blame-ignore-revs +# Initial formatting with Ruff +2b56cd434a129fe0ab1311f7fe9a65767ed88b9e diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..f3e706a --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1 @@ +* @GenomicMedLab/wagner-lab-developers diff --git a/.github/labels.yml b/.github/labels.yml new file mode 100644 index 0000000..070fda8 --- /dev/null +++ b/.github/labels.yml @@ -0,0 +1,3 @@ +# file must contain an array, which may be empty + +[] diff --git a/.github/workflows/build-and-push-image.yml b/.github/workflows/build-and-push-image.yml new file mode 100644 index 0000000..cadbd5e --- /dev/null +++ b/.github/workflows/build-and-push-image.yml @@ -0,0 +1,48 @@ +# https://josephrodriguezg.medium.com/build-and-publish-docker-images-with-github-actions + + +name: Build and publish a Docker image +on: + push: + branches: + - 'main' + tags: + - '[0-9]+.[0-9]+.[0-9]+' + - '[0-9]+.[0-9]+.[0-9]+rc[0-9]+' +jobs: + build: + name: Build & push docker image + runs-on: ubuntu-latest + env: + IMG_NAME: ${{ github.repository }} + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Debug + run: | + echo "github.ref -> {{ github.ref }}" + + - name: Docker metadata + id: metadata + uses: docker/metadata-action@v3 + with: + images: ${{ env.IMG_NAME }} + tags: | + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=raw,value={{sha}},enable=${{ github.ref_type != 'tag' }} + - name: Log in to Docker Hub + uses: docker/login-action@v1 + if: ${{ github.ref_type == 'tag' }} + with: + username: ${{ vars.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + + - name: Build and push Docker image + uses: docker/build-push-action@v2 + with: + context: . + push: ${{ github.event.base_ref =='refs/heads/main' && github.ref_type == 'tag' }} + tags: ${{ steps.metadata.outputs.tags }} + labels: ${{ steps.metadata.outputs.labels }} diff --git a/.github/workflows/labels.yml b/.github/workflows/labels.yml new file mode 100644 index 0000000..52c578c --- /dev/null +++ b/.github/workflows/labels.yml @@ -0,0 +1,29 @@ +name: Sync labels +on: + workflow_dispatch: + push: + branches: + - 'main' + paths: + - '.github/labels.yml' + - '.github/workflows/labels.yml' + +permissions: + issues: write + +jobs: + labels: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + sparse-checkout: .github/labels.yml + + - uses: EndBug/label-sync@v2 + with: + config-file: | + https://raw.githubusercontent.com/biocommons/.github/main/etc/labels.yml + .github/labels.yml + + delete-other-labels: false \ No newline at end of file diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml new file mode 100644 index 0000000..bc116cd --- /dev/null +++ b/.github/workflows/python-package.yml @@ -0,0 +1,68 @@ +name: Python package + +on: + push: + +jobs: + cqa: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - uses: awalsh128/cache-apt-pkgs-action@latest + with: + packages: zsh + version: 1.0 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.11" + cache: pip + cache-dependency-path: '**/pyproject.yaml' + + - name: Install test dependencies + run: | + python -m pip install --upgrade pip + pip install --use-deprecated=legacy-resolver -e '.[dev]' + + - name: Lint with Ruff + run: | + ruff check . + + - name: Format check with Ruff + run: | + ruff format --check . + + test: + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + python-version: ["3.9", "3.10", "3.11"] + + steps: + - uses: actions/checkout@v4 + + - uses: awalsh128/cache-apt-pkgs-action@latest + with: + packages: zsh + version: 1.0 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: pip + cache-dependency-path: '**/pyproject.yaml' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + make develop + + - name: Test with pytest + run: | + make test diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index 394ea69..d7a6cd4 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -1,24 +1,11 @@ -name: Mark stale issues and pull requests +# https://github.com/actions/stale +name: 'Close stale issues and PRs' on: + workflow_dispatch: schedule: - - cron: "30 1 * * *" + - cron: '1 1 * * *' jobs: stale: - - runs-on: ubuntu-latest - - steps: - - uses: actions/stale@v1 - with: - repo-token: ${{ secrets.GITHUB_TOKEN }} - stale-issue-label: Stale - stale-issue-message: >- - This issue was marked stale due to inactivity. - stale-pr-label: Stale - stale-pr-message: >- - This pull request was marked stale due to inactivity. - days-before-stale: 60 - days-before-close: 14 - exempt-issue-labels: "Stayin' Alive" + uses: biocommons/.github/.github/workflows/stale.yml@main diff --git a/.gitignore b/.gitignore index 1678dbe..f3419be 100644 --- a/.gitignore +++ b/.gitignore @@ -1,108 +1,22 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -env/ -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -*.egg-info/ -.installed.cfg +*.bak *.egg - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*,cover -.hypothesis/ - -# Translations -*.mo -*.pot - -# Django stuff: +*.egg-info *.log -local_settings.py - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# IPython Notebook -.ipynb_checkpoints - -# pyenv -.python-version - -# celery beat schedule file -celerybeat-schedule - -# dotenv -.env - -# virtualenv -venv/ -ENV/ - -# Spyder project settings -.spyderproject - -# Rope project settings -.ropeproject - -# PyCharm -.idea - -*~ -*.bak +*.manifest *.orig -doc/_build +*.sqlite3 +*~ +.cache +.coverage .eggs -archive -misc/docker -.pytest_cache - -doc/changelog/*/.tags -doc/changelog/*/.tags.mk -doc/changelog/*/hg-git-remap.pl +.idea +.ipynb_checkpoints +.tox .vscode -web_* -refget-compliance-suite/ +__pycache__ +archive +build +dist +docs/_build +nosetests.xml +venv diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..da36626 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,6 @@ +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.4.4 + hooks: + - id: ruff-format + args: [ --check ] + diff --git a/Dockerfile b/Dockerfile index 1d0e94c..d73be94 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,8 @@ -FROM ubuntu:20.04 +# syntax = docker/dockerfile:experimental + +# docker build -t biocommons/seqrepo-rest-service . + +FROM ubuntu:22.04 RUN apt update && apt upgrade -y && apt install -y \ curl \ @@ -6,11 +10,11 @@ RUN apt update && apt upgrade -y && apt install -y \ python3-pip \ zip -RUN pip3 install -U setuptools pip +RUN --mount=type=cache,target=/root/.cache/pip pip3 install -U setuptools pip COPY . /app/ WORKDIR /app -RUN python3 setup.py install +RUN --mount=type=cache,target=/root/.cache/pip pip install -e . EXPOSE 5000 diff --git a/Makefile b/Makefile index 29a52d1..aa920fc 100644 --- a/Makefile +++ b/Makefile @@ -5,17 +5,15 @@ .PRECIOUS: .SUFFIXES: -SHELL:=/bin/bash -e -o pipefail +SHELL:=zsh -eu -o pipefail -o null_glob SELF:=$(firstword $(MAKEFILE_LIST)) -PKG=biocommons.seqrepo -PKGD=$(subst .,/,${PKG}) -PYV:=3.8 -VEDIR=venv/${PYV} +VE_DIR=venv TEST_DIRS:=tests -DOC_TESTS:=doc hgvs ./README.rst +DOC_TESTS:=src ./README.md +export DOCKER_BUILDKIT=1 ############################################################################ #= BASIC USAGE @@ -32,34 +30,44 @@ help: #=> devready: create venv, install prerequisites, install pkg in develop mode .PHONY: devready devready: - make ${VEDIR} && source ${VEDIR}/bin/activate && make develop + make ${VE_DIR} && source ${VE_DIR}/bin/activate && make develop @echo '#################################################################################' - @echo '### Do not forget to `source ${VEDIR}/bin/activate` to use this environment ###' + @echo '### Do not forget to `source ${VE_DIR}/bin/activate` to use this environment ###' @echo '#################################################################################' #=> venv: make a Python 3 virtual environment -venv/%: - python$* -mvenv $@; \ +${VE_DIR}: + python3 -mvenv $@; \ source $@/bin/activate; \ - python -m ensurepip --upgrade; \ - pip install --upgrade pip setuptools + python3 -m ensurepip --upgrade; \ + pip install --upgrade pip setuptools wheel #=> develop: install package in develop mode .PHONY: develop develop: - pip install -e .[dev] + pip install -e ".[dev]" #=> install: install package -#=> bdist bdist_egg bdist_wheel build sdist: distribution options -.PHONY: bdist bdist_egg bdist_wheel build build_sphinx sdist install -bdist bdist_egg bdist_wheel build sdist install: %: - python setup.py $@ +.PHONY: install +install: + pip install . +#=> build: make sdist and wheel +.PHONY: build +build: %: + python -m build ############################################################################ #= TESTING # see test configuration in setup.cfg +#=> cqa: execute code quality tests +cqa: + flake8 src --count --select=E9,F63,F7,F82 --show-source --statistics + isort --profile black --check src + ruff format --check src + bandit -ll -r src + #=> test: execute tests #=> test-code: test code (including embedded doctests) #=> test-docs: test example code in docs @@ -69,13 +77,13 @@ bdist bdist_egg bdist_wheel build sdist install: %: # => extra fx issues mapping models normalization parametrize pnd quick regression validation .PHONY: test test-code test-docs test: - python setup.py pytest -test-code: - python setup.py pytest --addopts="${TEST_DIRS}" + pytest test-docs: - python setup.py pytest --addopts="${DOC_TESTS}" + pytest docs +test-code: + pytest src test-%: - python setup.py pytest --addopts="-m '$*' ${TEST_DIRS}" + pytest -m '$*' src #=> tox -- run all tox tests tox: @@ -85,14 +93,18 @@ tox: ############################################################################ #= UTILITY TARGETS -# N.B. Although code is stored in github, I use hg and hg-git on the command line -#=> reformat: reformat code with yapf and commit +#=> reformat: reformat code with ruff/isort and commit .PHONY: reformat reformat: - @if hg sum | grep -qL '^commit:.*modified'; then echo "Repository not clean" 1>&2; exit 1; fi - @if hg sum | grep -qL ' applied'; then echo "Repository has applied patches" 1>&2; exit 1; fi - yapf -i -r "${PKGD}" tests - hg commit -m "reformatted with yapf" + @if ! git diff --cached --exit-code >/dev/null; then echo "Repository not clean" 1>&2; exit 1; fi + ruff format src tests + isort src tests + git commit -a -m "reformatted with ruff and isort" + +#=> rename: rename files and substitute content for new repo name +.PHONY: rename +rename: + ./sbin/rename-package #=> docs -- make sphinx docs .PHONY: docs @@ -100,36 +112,51 @@ docs: develop # RTD makes json. Build here to ensure that it works. make -C doc html json +#=> docker-image: build docker image +docker-image: + docker build -t biocommons/seqrepo-rest-service . + TAG=$$(git describe --tags | head -1); if [ -n "$$TAG" ]; then \ + docker tag biocommons/seqrepo-rest-service:latest biocommons/seqrepo-rest-service:$$TAG; \ + echo "Created biocommons/seqrepo-rest-service:$$TAG"; \ + fi + ############################################################################ #= CLEANUP #=> clean: remove temporary and backup files .PHONY: clean clean: - find . \( -name \*~ -o -name \*.bak \) -print0 | xargs -0r rm + rm -frv **/*~ **/*.bak #=> cleaner: remove files and directories that are easily rebuilt .PHONY: cleaner cleaner: clean - rm -fr .cache *.egg-info build dist doc/_build htmlcov - find . \( -name \*.pyc -o -name \*.orig -o -name \*.rej \) -print0 | xargs -0r rm - find . -name __pycache__ -print0 | xargs -0r rm -fr + rm -frv .cache build dist docs/_build + rm -frv **/__pycache__ + rm -frv **/*.egg-info + rm -frv **/*.pyc + rm -frv **/*.orig + rm -frv **/*.rej #=> cleanest: remove files and directories that require more time/network fetches to rebuild .PHONY: cleanest cleanest: cleaner - rm -fr .eggs .tox venv + rm -frv .eggs .tox venv +#=> distclean: remove untracked files and other detritus +.PHONY: distclean +distclean: cleanest + git clean -df ## -## Copyright 2016 Source Code Committers -## +## Copyright 2023 Source Code Committers +## ## Licensed under the Apache License, Version 2.0 (the "License"); ## you may not use this file except in compliance with the License. ## You may obtain a copy of the License at -## +## ## http://www.apache.org/licenses/LICENSE-2.0 -## +## ## Unless required by applicable law or agreed to in writing, software ## distributed under the License is distributed on an "AS IS" BASIS, ## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/Procfile b/Procfile index ab678e0..3ceba4b 100644 --- a/Procfile +++ b/Procfile @@ -1 +1 @@ -web: python3 app.py \ No newline at end of file +web: seqrepo-rest-service /usr/local/share/seqrepo/2024-02-20 \ No newline at end of file diff --git a/README.md b/README.md index aed10ff..e4a0686 100644 --- a/README.md +++ b/README.md @@ -1,26 +1,31 @@ # seqrepo-rest-api -Provides an OpenAPI-based REST interface to biological sequences and -sequence metadata. - -Clients refer to sequences and metadata using familiar identifiers, -such as NM_000551.3 or GRCh38:1, or any of several hash-based -identifiers. The interface supports fast slicing of arbitrary regions -of large sequences. - -A "fully-qualified" identifier includes a namespace to disambiguate -accessions (e.g., "1" in GRCh37 and GRCh38). If the namespace is -provided, seqrepo uses it as-is. If the namespace is not provided and -the unqualified identifier refers to a unique sequence, it is -returned; otherwise, ambiguous identifiers will raise an error. - -SeqRepo favors identifiers from [identifiers.org](identifiers.org) -whenever available. Examples include +Provides SeqRepo and GA4GH RefGet REST interfaces to biological sequences and sequence metadata from an existing +[seqrepo](https://github.com/biocommons/biocommons.seqrepo/) sequence repository. + +## Description + +Specific, named biological sequences provide the reference and coordinate +sysstem for communicating variation and consequential phenotypic changes. +Several databases of sequences exist, with significant overlap, all using +distinct names. Furthermore, these systems are often difficult to install +locally. + +Clients refer to sequences and metadata using familiar identifiers, such as +NM_000551.3 or GRCh38:1, or any of several hash-based identifiers. The +interface supports fast slicing of arbitrary regions of large sequences. + +A "fully-qualified" identifier includes a namespace to disambiguate accessions +(e.g., "1" in GRCh37 and GRCh38). If the namespace is provided, seqrepo uses it +as-is. If the namespace is not provided and the unqualified identifier refers +to a unique sequence, it is returned; otherwise, ambiguous identifiers will +raise an error. + +SeqRepo favors identifiers from [identifiers.org](identifiers.org) whenever available. Examples include [refseq](https://registry.identifiers.org/registry/refseq) and [ensembl](https://registry.identifiers.org/registry/ensembl). -This repository is the REST interface only. The underlying data is -provided by +This repository is the REST interface only. The underlying data is provided by [seqrepo](https://github.com/biocommons/biocommons.seqrepo/). This repository also implements the [GA4GH refget (v1) @@ -29,16 +34,15 @@ protocol](https://samtools.github.io/hts-specs/refget.html) at Released under the Apache License, 2.0. -Links: -[Issues](https://github.com/biocommons/seqrepo-rest-service/issues) | +Links: [Issues](https://github.com/biocommons/seqrepo-rest-service/issues) | [Docker image](https://cloud.docker.com/u/biocommons/repository/docker/biocommons/seqrepo-rest-service) ## Citation -Hart RK, Prlić A (2020) -**SeqRepo: A system for managing local collections of biological sequences.** +Hart RK, Prlić A (2020) +**SeqRepo: A system for managing local collections of biological sequences.** PLoS ONE 15(12): e0239883. https://doi.org/10.1371/journal.pone.0239883 @@ -103,27 +107,57 @@ With range: ## Development $ make devready - $ source venv/3.8/bin/activate - + $ source venv/bin/activate ## Running a local instance Once installed as above, you should be able to: - $ SEQREPO_DIR=/usr/local/share/seqrepo/latest - python3 app.py + $ seqrepo-rest-service /usr/local/share/seqrepo/2024-02-20 The navigate to the URL shown in the console output. -## Running a docker image +## Building and running a docker image + +A docker image can be built with this repo or pulled from [docker +hub](https://hub.docker.com/r/biocommons/seqrepo-rest-service). In either case, the container requires an existing +local [seqrepo](https://github.com/biocommons/biocommons.seqrepo/) sequence repository. + +To build a docker image in this repo: + + make docker-image + +This will create biocommons/seqrepo-rest-service:latest, like this: + + $ docker images + REPOSITORY TAG IMAGE ID CREATED SIZE + biocommons/seqrepo-rest-service latest ad9ca051c5c9 2 minutes ago 627MB + +This docker image is periodically pushed to docker hub. -A docker image is available. It expects to have a local -[seqrepo](https://github.com/biocommons/biocommons.seqrepo/) instance -installed. Invoke like this: +Invoke the docker image like this this: - $ docker run \ + docker run \ --name seqrepo-rest-service \ --detach --rm -p 5000:5000 \ - -v /usr/local/share/seqrepo/:/usr/local/share/seqrepo/ \ - biocommons/seqrepo-rest-service + -v /usr/local/share/seqrepo/2024-02-20:/mnt/seqrepo \ + biocommons/seqrepo-rest-service \ + seqrepo-rest-service /mnt/seqrepo + +Where the command line options are as follows: +* `--name seqrepo-rest-service:` Assigns the name `seqrepo-rest-service` to the container +* `--detach:` Runs the container in background and prints the container ID +* `--rm:` Automatically removes the container when it exits +* `-p 5000:5000:` Publishes a container’s port(s), `5000:5000`, to the local host +* `-v /usr/local/share/seqrepo/2024-02-20:/mnt/seqrepo`: Binds the local volume, `/usr/local/share/seqrepo/2024-02-20` to the address `/mnt/seqrepo` within the container +* `biocommons/seqrepo-rest-service:` Specifies the docker image (as built above) +* `seqrepo-rest-service:` Specifies the console name or entry point `seqrepo_rest_service.cli:main` +* `/mnt/seqrepo:` Specifies the SeqRepo instance directory, as corresponding to the volume above + +You should then be able to fetch a test sequence like this: + + $ curl 'http://127.0.0.1:5000/seqrepo/1/sequence/refseq:NM_000551.3?end=20' + CCTCGCCTCCGTTACAACGG + +If things aren't working, check the logs with `docker logs -f seqrepo-rest-service`. diff --git a/app.py b/app.py deleted file mode 100644 index 230eff9..0000000 --- a/app.py +++ /dev/null @@ -1,59 +0,0 @@ -"""start refget webservice - -""" - -from pathlib import Path -import logging -import os - -from pkg_resources import get_distribution, resource_filename - -import coloredlogs -import connexion -from flask import Flask, redirect - - -_logger = logging.getLogger(__name__) - -def main(): - coloredlogs.install(level="INFO") - - if "SEQREPO_DIR" not in os.environ: - _logger.warn("SEQREPO_DIR is undefined; rest service will use `latest`") - - cxapp = connexion.App(__name__, debug=True) - cxapp.app.url_map.strict_slashes = False - - spec_files = [] - - # seqrepo interface - spec_fn = f"seqrepo_rest_service/seqrepo/openapi.yaml" - cxapp.add_api(spec_fn, - validate_responses=True, - strict_validation=True) - spec_files += [spec_fn] - - @cxapp.route('/') - @cxapp.route('/seqrepo') - def seqrepo_ui(): - return redirect("/seqrepo/1/ui/") - - - # refget interface - spec_fn = f"seqrepo_rest_service/refget/refget-openapi.yaml" - cxapp.add_api(spec_fn, - validate_responses=True, - strict_validation=True) - spec_files += [spec_fn] - - @cxapp.route('/refget') - def refget_ui(): - return redirect("/refget/1/ui/") - - - _logger.info("Also watching " + str(spec_files)) - cxapp.run(host="0.0.0.0", - extra_files=spec_files) - -if __name__ == "__main__": - main() diff --git a/codebuild/deploy_eb_env.py b/codebuild/deploy_eb_env.py index ffab1c0..0eb3dfb 100644 --- a/codebuild/deploy_eb_env.py +++ b/codebuild/deploy_eb_env.py @@ -1,50 +1,37 @@ """Module for deploying MetaKB EB environment.""" + import boto3 import time -elasticbeanstalk = boto3.client('elasticbeanstalk') -servicecatalog = boto3.client('servicecatalog') + +elasticbeanstalk = boto3.client("elasticbeanstalk") +servicecatalog = boto3.client("servicecatalog") terminate_time = 12 eb_app_name = "Seqrepo" eb_env_name = "Seqrepo-staging-env" sc_product_id = "prod-qg2twc66t544g" -print(f'Launching new Service Catalog Product for staging environment: ' - f'{eb_app_name}') -sc_product_artifacts =\ - servicecatalog.list_provisioning_artifacts(ProductId=sc_product_id) -for artifact in sc_product_artifacts['ProvisioningArtifactDetails']: - if artifact['Active']: - provisioning_artifact_id = artifact['Id'] +print(f"Launching new Service Catalog Product for staging environment: " f"{eb_app_name}") +sc_product_artifacts = servicecatalog.list_provisioning_artifacts(ProductId=sc_product_id) +for artifact in sc_product_artifacts["ProvisioningArtifactDetails"]: + if artifact["Active"]: + provisioning_artifact_id = artifact["Id"] try: eb_provisioned_product = servicecatalog.provision_product( ProductId=sc_product_id, ProvisioningArtifactId=provisioning_artifact_id, ProvisionedProductName=eb_env_name, ProvisioningParameters=[ - { - 'Key': 'Env', - 'Value': eb_app_name - }, - { - 'Key': 'EnvType', - 'Value': 'staging' - }, - { - 'Key': 'TerminateTime', - 'Value': str(terminate_time) - } - ]) - eb_provisioned_product_Id = \ - eb_provisioned_product['RecordDetail']['ProvisionedProductId'] - product_status = servicecatalog.describe_provisioned_product( - Id=eb_provisioned_product_Id) - eb_provisioned_product_status =\ - product_status['ProvisionedProductDetail']['Status'] + {"Key": "Env", "Value": eb_app_name}, + {"Key": "EnvType", "Value": "staging"}, + {"Key": "TerminateTime", "Value": str(terminate_time)}, + ], + ) + eb_provisioned_product_Id = eb_provisioned_product["RecordDetail"]["ProvisionedProductId"] + product_status = servicecatalog.describe_provisioned_product(Id=eb_provisioned_product_Id) + eb_provisioned_product_status = product_status["ProvisionedProductDetail"]["Status"] while eb_provisioned_product_status == "UNDER_CHANGE": time.sleep(10) - product_status = servicecatalog.describe_provisioned_product( - Id=eb_provisioned_product_Id) - eb_provisioned_product_status = \ - product_status['ProvisionedProductDetail']['Status'] + product_status = servicecatalog.describe_provisioned_product(Id=eb_provisioned_product_Id) + eb_provisioned_product_status = product_status["ProvisionedProductDetail"]["Status"] print(eb_provisioned_product_status) except Exception as e: - print(str(e)) \ No newline at end of file + print(str(e)) diff --git a/codebuild/deploy_eb_env_dev.py b/codebuild/deploy_eb_env_dev.py index b387dda..e71306e 100644 --- a/codebuild/deploy_eb_env_dev.py +++ b/codebuild/deploy_eb_env_dev.py @@ -1,50 +1,37 @@ """Module for deploying MetaKB EB environment.""" + import boto3 import time -elasticbeanstalk = boto3.client('elasticbeanstalk') -servicecatalog = boto3.client('servicecatalog') + +elasticbeanstalk = boto3.client("elasticbeanstalk") +servicecatalog = boto3.client("servicecatalog") terminate_time = 12 eb_app_name = "Seqrepo" eb_env_name = "Seqrepo-dev-env" sc_product_id = "prod-qg2twc66t544g" -print(f'Launching new Service Catalog Product for staging environment: ' - f'{eb_app_name}') -sc_product_artifacts =\ - servicecatalog.list_provisioning_artifacts(ProductId=sc_product_id) -for artifact in sc_product_artifacts['ProvisioningArtifactDetails']: - if artifact['Active']: - provisioning_artifact_id = artifact['Id'] +print(f"Launching new Service Catalog Product for staging environment: " f"{eb_app_name}") +sc_product_artifacts = servicecatalog.list_provisioning_artifacts(ProductId=sc_product_id) +for artifact in sc_product_artifacts["ProvisioningArtifactDetails"]: + if artifact["Active"]: + provisioning_artifact_id = artifact["Id"] try: eb_provisioned_product = servicecatalog.provision_product( ProductId=sc_product_id, ProvisioningArtifactId=provisioning_artifact_id, ProvisionedProductName=eb_env_name, ProvisioningParameters=[ - { - 'Key': 'Env', - 'Value': eb_app_name - }, - { - 'Key': 'EnvType', - 'Value': 'dev' - }, - { - 'Key': 'TerminateTime', - 'Value': str(terminate_time) - } - ]) - eb_provisioned_product_Id = \ - eb_provisioned_product['RecordDetail']['ProvisionedProductId'] - product_status = servicecatalog.describe_provisioned_product( - Id=eb_provisioned_product_Id) - eb_provisioned_product_status =\ - product_status['ProvisionedProductDetail']['Status'] + {"Key": "Env", "Value": eb_app_name}, + {"Key": "EnvType", "Value": "dev"}, + {"Key": "TerminateTime", "Value": str(terminate_time)}, + ], + ) + eb_provisioned_product_Id = eb_provisioned_product["RecordDetail"]["ProvisionedProductId"] + product_status = servicecatalog.describe_provisioned_product(Id=eb_provisioned_product_Id) + eb_provisioned_product_status = product_status["ProvisionedProductDetail"]["Status"] while eb_provisioned_product_status == "UNDER_CHANGE": time.sleep(10) - product_status = servicecatalog.describe_provisioned_product( - Id=eb_provisioned_product_Id) - eb_provisioned_product_status = \ - product_status['ProvisionedProductDetail']['Status'] + product_status = servicecatalog.describe_provisioned_product(Id=eb_provisioned_product_Id) + eb_provisioned_product_status = product_status["ProvisionedProductDetail"]["Status"] print(eb_provisioned_product_status) except Exception as e: print(str(e)) diff --git a/codebuild/terminate_eb_env.py b/codebuild/terminate_eb_env.py index 908de85..9531dec 100644 --- a/codebuild/terminate_eb_env.py +++ b/codebuild/terminate_eb_env.py @@ -1,29 +1,24 @@ """Module for terminating MetaKB EB environment.""" + import boto3 import json import time -client = boto3.client('lambda') -servicecatalog = boto3.client('servicecatalog') + +client = boto3.client("lambda") +servicecatalog = boto3.client("servicecatalog") eb_env_name = "Seqrepo-staging-env" data = {"sc_provisioned_name": eb_env_name} -client.invoke(FunctionName='igm-inf-terminate-provisioned-product', - Payload=json.dumps(data)) +client.invoke(FunctionName="igm-inf-terminate-provisioned-product", Payload=json.dumps(data)) time.sleep(10) -provisioned_product =\ - servicecatalog.describe_provisioned_product(Name=eb_env_name) -eb_provisioned_product_Id = \ - provisioned_product['ProvisionedProductDetail']['Id'] -product_status = servicecatalog.describe_provisioned_product( - Id=eb_provisioned_product_Id) -eb_provisioned_product_status = \ - product_status['ProvisionedProductDetail']['Status'] +provisioned_product = servicecatalog.describe_provisioned_product(Name=eb_env_name) +eb_provisioned_product_Id = provisioned_product["ProvisionedProductDetail"]["Id"] +product_status = servicecatalog.describe_provisioned_product(Id=eb_provisioned_product_Id) +eb_provisioned_product_status = product_status["ProvisionedProductDetail"]["Status"] while eb_provisioned_product_status == "UNDER_CHANGE": time.sleep(10) try: - product_status = servicecatalog.describe_provisioned_product( - Id=eb_provisioned_product_Id) - eb_provisioned_product_status = \ - product_status['ProvisionedProductDetail']['Status'] + product_status = servicecatalog.describe_provisioned_product(Id=eb_provisioned_product_Id) + eb_provisioned_product_status = product_status["ProvisionedProductDetail"]["Status"] except: # noqa: E722 eb_provisioned_product_status = "PRODUCT NOT FOUND" - print(eb_provisioned_product_status) \ No newline at end of file + print(eb_provisioned_product_status) diff --git a/codebuild/terminate_eb_env_dev.py b/codebuild/terminate_eb_env_dev.py index 5a4fab3..f279213 100644 --- a/codebuild/terminate_eb_env_dev.py +++ b/codebuild/terminate_eb_env_dev.py @@ -1,29 +1,24 @@ """Module for terminating MetaKB EB environment.""" + import boto3 import json import time -client = boto3.client('lambda') -servicecatalog = boto3.client('servicecatalog') + +client = boto3.client("lambda") +servicecatalog = boto3.client("servicecatalog") eb_env_name = "Seqrepo-dev-env" data = {"sc_provisioned_name": eb_env_name} -client.invoke(FunctionName='igm-inf-terminate-provisioned-product', - Payload=json.dumps(data)) +client.invoke(FunctionName="igm-inf-terminate-provisioned-product", Payload=json.dumps(data)) time.sleep(10) -provisioned_product =\ - servicecatalog.describe_provisioned_product(Name=eb_env_name) -eb_provisioned_product_Id = \ - provisioned_product['ProvisionedProductDetail']['Id'] -product_status = servicecatalog.describe_provisioned_product( - Id=eb_provisioned_product_Id) -eb_provisioned_product_status = \ - product_status['ProvisionedProductDetail']['Status'] +provisioned_product = servicecatalog.describe_provisioned_product(Name=eb_env_name) +eb_provisioned_product_Id = provisioned_product["ProvisionedProductDetail"]["Id"] +product_status = servicecatalog.describe_provisioned_product(Id=eb_provisioned_product_Id) +eb_provisioned_product_status = product_status["ProvisionedProductDetail"]["Status"] while eb_provisioned_product_status == "UNDER_CHANGE": time.sleep(10) try: - product_status = servicecatalog.describe_provisioned_product( - Id=eb_provisioned_product_Id) - eb_provisioned_product_status = \ - product_status['ProvisionedProductDetail']['Status'] + product_status = servicecatalog.describe_provisioned_product(Id=eb_provisioned_product_Id) + eb_provisioned_product_status = product_status["ProvisionedProductDetail"]["Status"] except: # noqa: E722 eb_provisioned_product_status = "PRODUCT NOT FOUND" - print(eb_provisioned_product_status) \ No newline at end of file + print(eb_provisioned_product_status) diff --git a/misc/load-test.py b/misc/load-test.py new file mode 100644 index 0000000..c42af25 --- /dev/null +++ b/misc/load-test.py @@ -0,0 +1,220 @@ +#!/usr/bin/env python3 + +""" +SeqRepo REST Service load testing. Used for validating concurrency / file descriptor bug fixes. + +Also useful for testing response performance of the seqrepo-rest-service program. Prints the +average request completion rate (requests/second) at the end. + +Uses multiprocessing.Process to create parallel workers (count=`-n`) sending requests to a +SeqRepo REST Service endpoint (`-u`). Sends `-m` requests with different values. Uses `-s` local +seqrepo directory to get viable input values from and monitors the number of open files using lsof. + +If running the seqrepo rest service through Docker, the lsof monitoring will only work if the seqrepo +directory the REST service uses is mounted as a local volume in the `docker run`. It cannot be on +a persistent docker volume or copied in at runtime because lsof will not see the open files. + +Example docker run for server, where a local seqrepo directory exists at /usr/local/share/seqrepo/latest: +``` +docker run -it --rm \ + -p 5000:5000 \ + -v /usr/local/share/seqrepo/latest:/seqrepo/latest \ + biocommons/seqrepo-rest-service:0.2.2 \ + seqrepo-rest-service /seqrepo/latest +``` + +Example command (20 worker processes, 500 requests, monitoring /usr/local/share/seqrepo/latest): +``` +python load-test.py -n 20 -s /usr/local/share/seqrepo/latest -m 500 -u 'http://localhost:5000/seqrepo' +``` + +A successful run will exit successfully, with no exceptions in the load-test.py process or in +the seqrepo rest service process. And the open file count logged by load-test.py will not increase +continuously but rather stabilize at a relatively low level on the order of tens of files. +""" + +import argparse +import pathlib +import random +import subprocess +import logging +import time +import sys +import queue +import multiprocessing # as multiprocessing +from typing import TextIO + +from biocommons.seqrepo import SeqRepo +from biocommons.seqrepo.dataproxy import SeqRepoRESTDataProxy + +_logger = logging.getLogger() + + +def log(log_queue: multiprocessing.Queue, line: str): + log_queue.put(line + "\n") + + +def lsof_count(dirname: str) -> int: + lsof_cmd = ["bash", "-c", f"lsof +D {dirname} | wc -l"] + lsof_p = subprocess.Popen(lsof_cmd, stdout=subprocess.PIPE) + (stdout, _) = lsof_p.communicate() + stdout = stdout.decode("utf-8").strip() + return int(stdout) + + +class LsofWorker(multiprocessing.Process): + def __init__(self, dirname, check_interval=5): + """ + check_interval: seconds between open file checks + """ + self.dirname = dirname + self.check_interval = check_interval + super().__init__() + + def run(self): + try: + while True: + ct = lsof_count(self.dirname) + print(f"{self.dirname} open file count {ct}", flush=True) + time.sleep(self.check_interval) + except InterruptedError: + pass + + +class MPWorker(multiprocessing.Process): + close_sentinel_value = -1 + + def __init__(self, q: multiprocessing.Queue, seqrepo_uri: str): + self.q = q + self.seqrepo_uri = seqrepo_uri + self.seqrepo_dataproxy = SeqRepoRESTDataProxy(seqrepo_uri) + self.n = 0 + self.query_bound_start = 0 + self.query_bound_end = 5 + super().__init__() + + def run(self): + while True: + try: + ac = self.q.get(False) + if ac == MPWorker.close_sentinel_value: + print(f"{self}: Done; processed {self.n} accessions", flush=True) + break + self.seqrepo_dataproxy.get_sequence( + ac, self.query_bound_start, self.query_bound_end + ) + self.n += 1 + except queue.Empty: + pass + + +def queue_filler_target(q, acs, n_workers): + """ + Callable target for queue filler. Necessary because multiprocess.Queue + uses pipes with a buffer limit that is relatively low. Background process + ensures queue keeps getting rest of input values, plus close sentinels. + """ + for ac in acs: + q.put(ac) + for _ in range(n_workers): + q.put(MPWorker.close_sentinel_value) + print("Done filling input queue", flush=True) + + +class StdOutPipeWorker(multiprocessing.Process): + """ + Used for synchronized logging between main and sub processes + """ + + def __init__(self, stdout_queue: multiprocessing.Queue, ostream: TextIO = None): + self.stdout_queue = stdout_queue + self.ostream = ostream if ostream else sys.stdout + self.stopped = False + super().__init__() + + def run(self): + while not self.stopped: + try: + val = self.stdout_queue.get(timeout=0.5) + print(val, file=self.ostream, end="") + except queue.Empty: + pass + + def stop(self): + self.stopped = True + + +def parse_args(argv): + ap = argparse.ArgumentParser(description=__doc__) + ap.add_argument("-n", "--num-workers", type=int, default=1) + ap.add_argument( + "-s", + "--seqrepo-path", + type=pathlib.Path, + required=True, + help="Local SeqRepo instance to get input values from, and to monitor open file count in", + ) + ap.add_argument("-u", "--seqrepo-rest-uri", type=str, default="http://localhost:5000/seqrepo") + ap.add_argument("-m", "--max-accessions", type=int, required=True) + ap.add_argument("-f", "--fd-cache-size", type=int, default=0) + opts = ap.parse_args(argv) + return opts + + +def main(argv): + opts = parse_args(argv) + + sr = SeqRepo(root_dir=opts.seqrepo_path, fd_cache_size=opts.fd_cache_size) + + acs = set(a["alias"] for a in sr.aliases.find_aliases(namespace="RefSeq", alias="NM_%")) + acs = random.sample(sorted(acs), opts.max_accessions or len(acs)) + + input_queue = multiprocessing.Queue() + + # log_queue = multiprocessing.Queue(maxsize=10000) + # log_worker = StdOutPipeWorker(log_queue, sys.stdout) + # log_worker.start() + + t_filler = multiprocessing.Process( + target=queue_filler_target, args=(input_queue, acs, opts.num_workers) + ) + t_filler.start() + + workers = [] + for _ in range(opts.num_workers): + workers.append(MPWorker(input_queue, opts.seqrepo_rest_uri)) + + lsof_p = None + print("Starting lsof process") + lsof_p = LsofWorker(opts.seqrepo_path, 1) + lsof_p.start() + + # Sleep briefly to let input queue get ahead + time.sleep(1) + print("Finished initialization") + + time_start = time.time() + print("Starting workers") + for w in workers: + w.start() + + for w in workers: + w.join() + + time_end = time.time() + time_diff = time_end - time_start + + if lsof_p: + lsof_p.terminate() + + print(f"Retrieved {len(acs)} seq in {time_diff} seconds ({len(acs)/time_diff} seq/sec)") + + # log_worker.stop() + # log_worker.join() + + +if __name__ == "__main__": + import coloredlogs + + coloredlogs.install(level="INFO") + main(argv=sys.argv[1:]) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..dc3c7e5 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,132 @@ +[project] +name = "seqrepo-rest-service" +authors = [ + { name = "biocommons contributors", email = "biocommons-dev@googlegroups.com" }, +] +description = "SeqRepo REST Service" +readme = "README.md" +license = { file="LICENSE.txt" } +requires-python = ">=3.9" +classifiers = [ + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", +] +dynamic = ["version"] +dependencies = [ + "biocommons.seqrepo ~= 0.6", + "coloredlogs", + "connexion[swagger-ui] ~= 2.2", + "Flask ~= 2.2", +] + +[project.optional-dependencies] +dev = [ + "bandit ~= 1.7", + "build ~= 0.8", + "flake8 ~= 4.0", + "ipython ~= 8.4", + "isort ~= 5.10", + "mypy-extensions ~= 1.0", + "pre-commit ~= 3.4", + "pylint ~= 2.14", + "pytest-cov ~= 4.1", + "pytest-optional-tests", + "pytest ~= 7.1", + "pyright~=1.1", + "ruff == 0.4.4", + "tox ~= 3.25", + "vcrpy", +] +docs = ["mkdocs"] + +[project.scripts] +seqrepo-rest-service = "seqrepo_rest_service.cli:main" + +[project.urls] +"Homepage" = "https://github.com/biocommons/seqrepo-rest-service" +"Bug Tracker" = "https://github.com/biocommons/seqrepo-rest-service/issues" + +[build-system] +requires = ["setuptools ~= 69.0", "setuptools_scm[toml] ~= 8.0"] +build-backend = "setuptools.build_meta" + + +[tool.setuptools] +include-package-data = true + +[tool.setuptools.packages.find] +where = ["src"] +exclude = ["__pycache__", "*.pyc"] +namespaces = true + +[tool.setuptools.package-data] +"*" = ["*.gz", "*.json", "*.yaml"] + + +[tool.setuptools_scm] + + +# https://docs.pytest.org/en/6.2.x/customize.html#pyproject-toml +[tool.pytest.ini_options] +addopts = "-s -v -x --strict-markers -m 'not extra' --doctest-modules --cov=src" +doctest_optionflags = [ + "ALLOW_BYTES", + "ALLOW_UNICODE", + "ELLIPSIS", + "IGNORE_EXCEPTION_DETAIL", + "NORMALIZE_WHITESPACE", +] +markers = [ + "network: tests that require network connectivity", + "slow: slow tests that should be run infrequently", + "vcr: tests with cached data", +] + + +[tool.coverage.run] +branch = true +source = ["biocommons.example"] +omit = ["*_test.py", "*/test/*", "*/tests/*"] + + +[tool.coverage.report] +show_missing = true +exclude_lines = [ + # Have to re-enable the standard pragma + "pragma: no cover", + + # Don't complain about missing debug-only code: + "def __repr__", + "if self.debug", + + # Don't complain if tests don't hit defensive assertion code: + "raise AssertionError", + "raise NotImplementedError", + + # Don't complain if non-runnable code isn't run: + "if __name__ == .__main__.:", +] + +[tool.isort] +profile = "black" +src_paths = ["src", "tests"] + +[tool.pyright] +include = ["src", "tests"] + + +[tool.pylint.'MESSAGES CONTROL'] +disable = "R0913" + + +[tool.pylint.format] +max-line-length = 100 + +[tool.ruff] +src = ["src", "tests"] +line-length = 100 + diff --git a/pytest.ini b/pytest.ini index b477ddc..b382eba 100644 --- a/pytest.ini +++ b/pytest.ini @@ -9,7 +9,7 @@ doctest_optionflags = ALLOW_UNICODE ALLOW_BYTES norecursedirs = - *.egg-info .eggs build cover _data dist etc examples misc prof sbin tmp venv + *.egg-info .eggs build cover _data dist etc examples misc prof sbin tmp venv codebuild # show warnings filterwarnings = diff --git a/sbin/makefile-extract-documentation b/sbin/makefile-extract-documentation new file mode 100755 index 0000000..29b77c0 --- /dev/null +++ b/sbin/makefile-extract-documentation @@ -0,0 +1,29 @@ +#!/usr/bin/env perl +# makefile-extract-documentation -- extract doc from a makefile + +# This script prints the header, up to the first empty line AND prints +# line starting with #=, and indented by an optional number of >. + +my $print = 1; +while (<>) { + next if ($. == 1 and m/^\#!/); + print if ($print or s/^\#=([>]*)\s+/' ' x length($1)/e); + $print = 0 if m/^$/; +} + + +# +# Copyright 2021 Reece Hart (reecehart@gmail.com) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/sbin/rename-package b/sbin/rename-package new file mode 100755 index 0000000..2d02d36 --- /dev/null +++ b/sbin/rename-package @@ -0,0 +1,50 @@ +#!/bin/bash +# renames this Python package + + +new_name="$1" +if [ -z "$new_name" ]; then + # This expression is intended to work for urls like https://github.com/biocommons/biocommons.example.git + # (preferred for Python packages) and https://github.com/biocommons/example.git + new_name=$(expr "$(git remote get-url origin)" : '.*biocommons.\(.*\).git') +fi +echo "Renaming to $new_name" + +if ! git diff --cached --exit-code >/dev/null; then + echo "Repository not clean" 1>&2; + exit 1; +fi + +# Substitute new name for example in relevant files +xargs perl -i.bak -p0e "s/(biocommons.)example/\1$new_name/g" <= 3.5 -zip_safe = false -install_requires = - biocommons.seqrepo>=0.6.2 - coloredlogs - connexion[swagger-ui] - Jinja2<3.0 - MarkupSafe==2.0.1 - -setup_requires = - pytest-runner - setuptools_scm - wheel - -tests_require = - pytest - pytest-cov - vcrpy - - -[options.entry_points] -console_scripts = - seqrepo-rest-service = seqrepo_rest_service.__main__:main - - -[options.extras_require] -dev = - ipython - tox - - -# TODO: Needed? -[options.packages.find] -exclude = - __pycache__ - *.pyc - - -[options.package_data] -* = - *.yaml - - - - -[aliases] -test = pytest - -[bdist_wheel] -universal = 1 - -[build_sphinx] -all_files = 1 - -# http://pep8.readthedocs.org/en/latest/intro.html#error-codes -[flake8] -max-line-length = 120 -exclude = tests/* -max-complexity = 10 -ignore = E129,E221,E241,E251,E303,W291 - -[tool:pytest] -addopts = --cov-config=setup.cfg --cov-report term-missing --doctest-modules --doctest-glob='*.rst' --strict -collect_ignore = setup.py -doctest_optionflags = ALLOW_UNICODE ALLOW_BYTES ELLIPSIS IGNORE_EXCEPTION_DETAIL NORMALIZE_WHITESPACE -norecursedirs = _data -testpaths = tests -markers = - network - diff --git a/setup.py b/setup.py deleted file mode 100644 index 460aabe..0000000 --- a/setup.py +++ /dev/null @@ -1,2 +0,0 @@ -from setuptools import setup -setup(use_scm_version=True) diff --git a/src/seqrepo_rest_service/__init__.py b/src/seqrepo_rest_service/__init__.py new file mode 100644 index 0000000..01924fc --- /dev/null +++ b/src/seqrepo_rest_service/__init__.py @@ -0,0 +1,7 @@ +from importlib.metadata import PackageNotFoundError, version + +try: + __version__ = version(__package__) +except PackageNotFoundError: # pragma: no cover + # package is not installed + __version__ = None diff --git a/src/seqrepo_rest_service/cli.py b/src/seqrepo_rest_service/cli.py new file mode 100644 index 0000000..313d987 --- /dev/null +++ b/src/seqrepo_rest_service/cli.py @@ -0,0 +1,86 @@ +"""start seqrepo rest service""" + +import argparse +import importlib.resources +import logging +import os +import pathlib +import time + +import coloredlogs +import connexion +from biocommons.seqrepo import SeqRepo +from flask import redirect + +WAIT_POLL_PERIOD = 15 # seconds between polling for SEQREPO PATH + + +_logger = logging.getLogger(__name__) +resources = importlib.resources.files(__package__) + + +def _parse_opts(): + ap = argparse.ArgumentParser(description=__doc__.split()[0]) + ap.add_argument( + "SEQREPO_INSTANCE_DIR", + type=pathlib.Path, + help="SeqRepo instance directory (e.g., /usr/local/share/seqrepo/2024-02-20)", + ) + ap.add_argument( + "--wait-for-path", + "-w", + action="store_true", + default=False, + help="Wait for path to exist before starting (useful for docker-compose)", + ) + opts = ap.parse_args() + return opts + + +def main(): + coloredlogs.install(level="INFO") + + if "SEQREPO_DIR" in os.environ: + _logger.warn("SEQREPO_DIR environment variable is now ignored") + + opts = _parse_opts() + + seqrepo_dir = opts.SEQREPO_INSTANCE_DIR + if opts.wait_for_path: + while not seqrepo_dir.exists(): + _logger.info(f"{seqrepo_dir}: waiting for existence") + time.sleep(WAIT_POLL_PERIOD) + _logger.info(f"{seqrepo_dir}: path found") + _ = SeqRepo(seqrepo_dir.as_posix()) # test opening + + cxapp = connexion.App(__name__, debug=True) + cxapp.app.url_map.strict_slashes = False + cxapp.app.config["seqrepo_dir"] = seqrepo_dir + + spec_files = [] + + # seqrepo interface + spec_fn = resources / "seqrepo" / "openapi.yaml" + cxapp.add_api(str(spec_fn), validate_responses=True, strict_validation=True) + spec_files += [spec_fn] + + @cxapp.route("/") + @cxapp.route("/seqrepo") + def seqrepo_ui(): + return redirect("/seqrepo/1/ui/") + + # refget interface + spec_fn = resources / "refget" / "refget-openapi.yaml" + cxapp.add_api(str(spec_fn), validate_responses=True, strict_validation=True) + spec_files += [spec_fn] + + @cxapp.route("/refget") + def refget_ui(): + return redirect("/refget/1/ui/") + + _logger.info("Also watching " + str(spec_files)) + cxapp.run(host="0.0.0.0", extra_files=spec_files) + + +if __name__ == "__main__": + main() diff --git a/seqrepo_rest_service/refget/__init__.py b/src/seqrepo_rest_service/refget/__init__.py similarity index 100% rename from seqrepo_rest_service/refget/__init__.py rename to src/seqrepo_rest_service/refget/__init__.py diff --git a/seqrepo_rest_service/refget/refget-openapi.yaml b/src/seqrepo_rest_service/refget/refget-openapi.yaml similarity index 100% rename from seqrepo_rest_service/refget/refget-openapi.yaml rename to src/seqrepo_rest_service/refget/refget-openapi.yaml diff --git a/seqrepo_rest_service/refget/routes/__init__.py b/src/seqrepo_rest_service/refget/routes/__init__.py similarity index 100% rename from seqrepo_rest_service/refget/routes/__init__.py rename to src/seqrepo_rest_service/refget/routes/__init__.py diff --git a/seqrepo_rest_service/refget/routes/metadata.py b/src/seqrepo_rest_service/refget/routes/metadata.py similarity index 77% rename from seqrepo_rest_service/refget/routes/metadata.py rename to src/seqrepo_rest_service/refget/routes/metadata.py index deeca0a..e00f85d 100644 --- a/seqrepo_rest_service/refget/routes/metadata.py +++ b/src/seqrepo_rest_service/refget/routes/metadata.py @@ -3,7 +3,12 @@ from connexion import NoContent, request from seqrepo_rest_service.threadglobals import get_seqrepo -from seqrepo_rest_service.utils import get_sequence_id, base64url_to_hex, problem, valid_content_types +from seqrepo_rest_service.utils import ( + get_sequence_id, + base64url_to_hex, + problem, + valid_content_types, +) _logger = logging.getLogger(__name__) @@ -14,7 +19,7 @@ def get(id): if accept_header and accept_header not in valid_content_types: _logger.warn(f"{accept_header} not valid") return problem(406, "Invalid Accept header") - + sr = get_seqrepo() seq_id = get_sequence_id(sr, id) if not seq_id: @@ -30,10 +35,7 @@ def get(id): "md5": md5_id, "trunc512": base64url_to_hex(seq_id), "length": seqinfo["len"], - "aliases": [ - {"naming_authority": a["namespace"], "alias": a["alias"]} - for a in aliases - ] - } + "aliases": [{"naming_authority": a["namespace"], "alias": a["alias"]} for a in aliases], + } return {"metadata": md}, 200 diff --git a/seqrepo_rest_service/refget/routes/ping.py b/src/seqrepo_rest_service/refget/routes/ping.py similarity index 100% rename from seqrepo_rest_service/refget/routes/ping.py rename to src/seqrepo_rest_service/refget/routes/ping.py diff --git a/seqrepo_rest_service/refget/routes/sequence.py b/src/seqrepo_rest_service/refget/routes/sequence.py similarity index 90% rename from seqrepo_rest_service/refget/routes/sequence.py rename to src/seqrepo_rest_service/refget/routes/sequence.py index 93f8a8f..b4a5ba1 100644 --- a/seqrepo_rest_service/refget/routes/sequence.py +++ b/src/seqrepo_rest_service/refget/routes/sequence.py @@ -6,7 +6,6 @@ from seqrepo_rest_service.threadglobals import get_seqrepo from seqrepo_rest_service.utils import get_sequence_id, problem, valid_content_types - _logger = logging.getLogger(__name__) range_re = re.compile("^bytes=(\d+)-(\d+)$") @@ -16,7 +15,7 @@ def get(id, start=None, end=None): accept_header = request.headers.get("Accept", None) if accept_header and accept_header not in valid_content_types: return problem(406, "Invalid Accept header") - + range_header = request.headers.get("Range", None) if range_header: _logger.debug(f"Received header `Range: {range_header}`") @@ -28,14 +27,14 @@ def get(id, start=None, end=None): start, end = int(m.group(1)), int(m.group(2)) + 1 _logger.debug(f"Parsed `{range_header}` as ({start}, {end})") if start > end: - return problem(416, f"Range queries may specify start > end") - + return problem(416, "Range queries may specify start > end") + sr = get_seqrepo() seq_id = get_sequence_id(sr, id) if not seq_id: return NoContent, 404 seqinfo = sr.sequences.fetch_seqinfo(seq_id) - + if start is not None and end is not None: if start >= seqinfo["len"]: return problem(416, "Invalid coordinates: start > sequence length") @@ -45,11 +44,12 @@ def get(id, start=None, end=None): if start > end: return problem(501, "Invalid coordinates: start > end") if not (0 <= start <= end <= seqinfo["len"]) and not range_header: - return problem(416, "Invalid coordinates: must obey 0 <= start <= end <= sequence_length") + return problem( + 416, "Invalid coordinates: must obey 0 <= start <= end <= sequence_length" + ) try: status = 206 if ((start or end) and range_header) else 200 return sr.sequences.fetch(seq_id, start, end), status except KeyError: return NoContent, 404 - diff --git a/seqrepo_rest_service/refget/routes/serviceInfo.py b/src/seqrepo_rest_service/refget/routes/serviceInfo.py similarity index 85% rename from seqrepo_rest_service/refget/routes/serviceInfo.py rename to src/seqrepo_rest_service/refget/routes/serviceInfo.py index 0a636a7..52d0dec 100644 --- a/seqrepo_rest_service/refget/routes/serviceInfo.py +++ b/src/seqrepo_rest_service/refget/routes/serviceInfo.py @@ -1,19 +1,17 @@ from seqrepo_rest_service import __version__ from seqrepo_rest_service.threadglobals import get_seqrepo -import biocommons.seqrepo +import biocommons.seqrepo import bioutils -from pkg_resources import get_distribution - def get(): sr = get_seqrepo() - + return { "service": { "algorithms": ["md5", "trunc512"], - "circular_supported" : False, + "circular_supported": False, "subsequence_limit": None, "supported_api_versions": ["1.0"], }, @@ -21,7 +19,7 @@ def get(): "seqrepo-rest-service": { "version": __version__, "url": "https://github.com/biocommons/seqrepo-rest-service/", - }, + }, "seqrepo": { "version": biocommons.seqrepo.__version__, "root": sr._root_dir, @@ -31,5 +29,5 @@ def get(): "version": bioutils.__version__, "url": "https://github.com/biocommons/bioutils/", }, - } + }, } diff --git a/seqrepo_rest_service/seqrepo/__init__.py b/src/seqrepo_rest_service/seqrepo/__init__.py similarity index 100% rename from seqrepo_rest_service/seqrepo/__init__.py rename to src/seqrepo_rest_service/seqrepo/__init__.py diff --git a/seqrepo_rest_service/seqrepo/openapi.yaml b/src/seqrepo_rest_service/seqrepo/openapi.yaml similarity index 100% rename from seqrepo_rest_service/seqrepo/openapi.yaml rename to src/seqrepo_rest_service/seqrepo/openapi.yaml diff --git a/seqrepo_rest_service/seqrepo/routes/__init__.py b/src/seqrepo_rest_service/seqrepo/routes/__init__.py similarity index 100% rename from seqrepo_rest_service/seqrepo/routes/__init__.py rename to src/seqrepo_rest_service/seqrepo/routes/__init__.py diff --git a/seqrepo_rest_service/seqrepo/routes/metadata.py b/src/seqrepo_rest_service/seqrepo/routes/metadata.py similarity index 98% rename from seqrepo_rest_service/seqrepo/routes/metadata.py rename to src/seqrepo_rest_service/seqrepo/routes/metadata.py index c6d5ce0..a9fd496 100644 --- a/seqrepo_rest_service/seqrepo/routes/metadata.py +++ b/src/seqrepo_rest_service/seqrepo/routes/metadata.py @@ -5,7 +5,6 @@ from seqrepo_rest_service.threadglobals import get_seqrepo from seqrepo_rest_service.utils import get_sequence_ids, problem - _logger = logging.getLogger(__name__) @@ -27,6 +26,6 @@ def get(alias): "aliases": [f"{a['namespace']}:{a['alias']}" for a in aliases], "alphabet": seqinfo["alpha"], "length": seqinfo["len"], - } + } return md, 200 diff --git a/seqrepo_rest_service/seqrepo/routes/ping.py b/src/seqrepo_rest_service/seqrepo/routes/ping.py similarity index 65% rename from seqrepo_rest_service/seqrepo/routes/ping.py rename to src/seqrepo_rest_service/seqrepo/routes/ping.py index bb94be4..4ed7361 100644 --- a/seqrepo_rest_service/seqrepo/routes/ping.py +++ b/src/seqrepo_rest_service/seqrepo/routes/ping.py @@ -1,27 +1,25 @@ -from seqrepo_rest_service import __version__ -from seqrepo_rest_service.threadglobals import get_seqrepo - -import biocommons.seqrepo +import biocommons.seqrepo import bioutils +from importlib import metadata -from pkg_resources import get_distribution +from ...threadglobals import get_seqrepo def get(): sr = get_seqrepo() - + return { - "version": __version__, + "version": metadata.version("seqrepo-rest-service"), "url": "https://github.com/biocommons/seqrepo-rest-service/", "dependencies": { "seqrepo": { "version": biocommons.seqrepo.__version__, - "root": sr._root_dir, + "root": str(sr._root_dir), "url": "https://github.com/biocommons/biocommons.seqrepo/", }, "bioutils": { "version": bioutils.__version__, "url": "https://github.com/biocommons/bioutils/", }, - } + }, } diff --git a/seqrepo_rest_service/seqrepo/routes/sequence.py b/src/seqrepo_rest_service/seqrepo/routes/sequence.py similarity index 99% rename from seqrepo_rest_service/seqrepo/routes/sequence.py rename to src/seqrepo_rest_service/seqrepo/routes/sequence.py index 6b026a6..aa703d6 100644 --- a/seqrepo_rest_service/seqrepo/routes/sequence.py +++ b/src/seqrepo_rest_service/seqrepo/routes/sequence.py @@ -5,7 +5,6 @@ from seqrepo_rest_service.threadglobals import get_seqrepo from seqrepo_rest_service.utils import get_sequence_ids, problem - _logger = logging.getLogger(__name__) diff --git a/src/seqrepo_rest_service/threadglobals.py b/src/seqrepo_rest_service/threadglobals.py new file mode 100644 index 0000000..ead67c2 --- /dev/null +++ b/src/seqrepo_rest_service/threadglobals.py @@ -0,0 +1,24 @@ +"""per-thread globals for seqrepo REST APIs""" + +import logging + +from biocommons.seqrepo import SeqRepo +from flask import current_app + +_logger = logging.getLogger(__name__) + + +def get_seqrepo(): + seqrepo_dir = current_app.config["seqrepo_dir"] + if _get_or_create("seqrepo", None, False) is None: + _logger.info("Opening seqrepo_dir=%s", seqrepo_dir) + return _get_or_create("seqrepo", lambda: SeqRepo(root_dir=seqrepo_dir)) + + +def _get_or_create(k, f, create=True): + k = "_" + k + o = getattr(_get_or_create, k, None) + if o is None and create: + o = f() + setattr(_get_or_create, k, o) + return o diff --git a/seqrepo_rest_service/utils.py b/src/seqrepo_rest_service/utils.py similarity index 96% rename from seqrepo_rest_service/utils.py rename to src/seqrepo_rest_service/utils.py index 3e7e797..656170c 100644 --- a/seqrepo_rest_service/utils.py +++ b/src/seqrepo_rest_service/utils.py @@ -1,14 +1,12 @@ +import logging +import re from base64 import urlsafe_b64decode, urlsafe_b64encode from binascii import hexlify, unhexlify from http.client import responses as http_responses -import logging -import re import connexion - from bioutils.accessions import infer_namespaces - _logger = logging.getLogger(__name__) @@ -19,15 +17,17 @@ "application/json", "text/plain", "*/*", - ] +] def hex_to_base64url(s): return urlsafe_b64encode(unhexlify(s)).decode("ascii") + def base64url_to_hex(s): return hexlify(urlsafe_b64decode(s)).decode("ascii") + def get_sequence_id(sr, query): """determine sequence_ids after guessing form of query @@ -35,19 +35,19 @@ def get_sequence_id(sr, query): * A fully-qualified sequence alias (e.g., VMC:0123 or refseq:NM_01234.5) * A digest or digest prefix from VMC, TRUNC512, or MD5 * A sequence accession (without namespace) - + Returns None if not found; seq_id if only one match; raises - RuntimeError for ambiguous matches. + RuntimeError for ambiguous matches. """ - + seq_ids = get_sequence_ids(sr, query) if len(seq_ids) == 0: _logger.warning(f"No sequence found for {query}") return None if len(seq_ids) > 1: raise RuntimeError(f"Multiple distinct sequences found for {query}") - return seq_ids.pop() # exactly 1 id found + return seq_ids.pop() # exactly 1 id found def get_sequence_ids(sr, query): @@ -57,7 +57,7 @@ def get_sequence_ids(sr, query): * A fully-qualified sequence alias (e.g., VMC:0123 or refseq:NM_01234.5) * A digest or digest prefix from VMC, TRUNC512, or MD5 * A sequence accession (without namespace) - + The first match will be returned. """ @@ -74,10 +74,10 @@ def problem(status, message): return connexion.problem(status=status, title=http_responses[status], detail=message) - ############################################################################ # INTERNAL + def _generate_nsa_options(query): """ >>> _generate_nsa_options("NM_000551.3") @@ -103,7 +103,7 @@ def _generate_nsa_options(query): if namespaces: nsa_options = [(ns, query) for ns in namespaces] return nsa_options - + # if hex, try md5 and TRUNC512 if re.match(r"^(?:[0-9A-Fa-f]{8,})$", query): nsa_options = [("MD5", query + "%")] diff --git a/tox.ini b/tox.ini index 286e99d..e3ae529 100644 --- a/tox.ini +++ b/tox.ini @@ -1,7 +1,8 @@ [tox] -envlist = py35,py36,py37 +envlist = py39,py310 +isolated_build = True -[testenv] -deps=pytest -whitelist_externals=make -commands=make test +[gh] +python = + 3.9 = py39, type + 3.10 = py310