From 9d013b5ac67c14441b2fa8288b0af2ea53cf5cfe Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Sun, 13 Oct 2024 11:32:49 +0200 Subject: [PATCH] Initial commit --- .github/workflows/build.yml | 38 ++++++ .github/workflows/doc-changes.yml | 23 ++++ .gitignore | 110 +++++++++++++++ .pre-commit-config.yaml | 51 +++++++ LICENSE | 201 ++++++++++++++++++++++++++++ README.md | 5 + cellregistry/__init__.py | 34 +++++ cellregistry/migrations/__init__.py | 0 cellregistry/models.py | 72 ++++++++++ noxfile.py | 31 +++++ pyproject.toml | 135 +++++++++++++++++++ tests/test_integrity.py | 17 +++ 12 files changed, 717 insertions(+) create mode 100644 .github/workflows/build.yml create mode 100644 .github/workflows/doc-changes.yml create mode 100644 .gitignore create mode 100644 .pre-commit-config.yaml create mode 100644 LICENSE create mode 100644 README.md create mode 100644 cellregistry/__init__.py create mode 100644 cellregistry/migrations/__init__.py create mode 100644 cellregistry/models.py create mode 100644 noxfile.py create mode 100644 pyproject.toml create mode 100644 tests/test_integrity.py diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..4688300 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,38 @@ +name: build + +on: + push: + branches: [main] + pull_request: + branches: [main, staging] + +jobs: + build: + runs-on: ubuntu-22.04 + env: + GITHUB_EVENT_NAME: ${{ github.event_name }} + strategy: + fail-fast: false + matrix: + include: + - os: ubuntu-latest + python: "3.10" + - os: ubuntu-latest + python: "3.10" + pip-flags: "--pre" + timeout-minutes: 15 + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python }} + - uses: actions/cache@v3 + with: + path: ~/.cache/pre-commit + key: pre-commit-${{ runner.os }}-${{ hashFiles('.pre-commit-config.yaml') }} + - run: pip install "git+https://@github.com/laminlabs/laminci" + - run: nox -s lint + - run: nox -s build diff --git a/.github/workflows/doc-changes.yml b/.github/workflows/doc-changes.yml new file mode 100644 index 0000000..bfb3415 --- /dev/null +++ b/.github/workflows/doc-changes.yml @@ -0,0 +1,23 @@ +name: doc-changes + +on: + pull_request_target: + branches: + - main + types: + - closed + +jobs: + latest-changes: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - run: pip install "laminci[doc-changes]@git+https://x-access-token:${{ secrets.LAMIN_BUILD_DOCS }}@github.com/laminlabs/laminci" + - run: laminci doc-changes + env: + repo_token: ${{ secrets.GITHUB_TOKEN }} + docs_token: ${{ secrets.LAMIN_BUILD_DOCS }} + changelog_file: lamin-docs/docs/changelog/soon/usecases.md diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d6390f3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,110 @@ +# macOS +.DS_Store +.AppleDouble +.LSOverride + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# dotenv +.env + +# virtualenv +.venv +venv/ +ENV/ + +# mypy +.mypy_cache/ + +# IDE settings +.vscode/ +.idea/ + +# Lamin +_build +docs/cellregistry.* +lamin_sphinx +docs/conf.py +_docs_tmp* +docs/test-cellregistry/ +test-cellregistry/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..e872396 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,51 @@ +fail_fast: false +default_language_version: + python: python3 +default_stages: + - commit + - push +minimum_pre_commit_version: 2.12.0 +repos: + - repo: https://github.com/pre-commit/mirrors-prettier + rev: v4.0.0-alpha.4 + hooks: + - id: prettier + exclude: | + (?x)( + docs/changelog.md + ) + - repo: https://github.com/kynan/nbstripout + rev: 0.6.1 + hooks: + - id: nbstripout + exclude: | + (?x)( + docs/examples/| + docs/notes/ + ) + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.5.5 + hooks: + - id: ruff + args: [--fix, --exit-non-zero-on-fix, --unsafe-fixes] + - id: ruff-format + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: detect-private-key + - id: check-ast + - id: end-of-file-fixer + exclude: | + (?x)( + .github/workflows/latest-changes.jinja2 + ) + - id: mixed-line-ending + args: [--fix=lf] + - id: trailing-whitespace + - id: check-case-conflict + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.7.1 + hooks: + - id: mypy + args: [--no-strict-optional, --ignore-missing-imports] + additional_dependencies: ["types-requests", "types-attrs"] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..b09cd78 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ +Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..acebcd8 --- /dev/null +++ b/README.md @@ -0,0 +1,5 @@ +# cellregistry: A registry for single cells + +This schema module provides a single registry `Cell` to store metadata for single cells. + +Read the [docs](https://docs.lamin.ai/cellregistry). diff --git a/cellregistry/__init__.py b/cellregistry/__init__.py new file mode 100644 index 0000000..b147350 --- /dev/null +++ b/cellregistry/__init__.py @@ -0,0 +1,34 @@ +"""A reference manager. + +This schema module provides a single registry `Reference` to store references to studies, reports, papers, blog posts, preprints. + +Install the package:: + + pip install cellregistry + +Import the package:: + + import cellregistry as frefs + +The `Reference` registry: + +.. autosummary:: + :toctree: . + + Reference +""" + +__version__ = "0.1.0" # denote a pre-release for 0.1.0 with 0.1rc1 + +from lamindb_setup import _check_instance_setup + + +def __getattr__(name): + if name != "models": + _check_instance_setup(from_module="cellregistry") + return globals()[name] + + +if _check_instance_setup(): + del __getattr__ # delete so that imports work out + from .models import Reference diff --git a/cellregistry/migrations/__init__.py b/cellregistry/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cellregistry/models.py b/cellregistry/models.py new file mode 100644 index 0000000..8d943a5 --- /dev/null +++ b/cellregistry/models.py @@ -0,0 +1,72 @@ +from __future__ import annotations + +from django.db import models +from django.db.models import CASCADE, PROTECT +from lnschema_core import ids +from lnschema_core.models import ( + Artifact, + CanValidate, + Feature, + LinkORM, + Record, + TracksRun, + TracksUpdates, +) + + +class Cell(Record, CanValidate, TracksRun, TracksUpdates): + """References. + + Example: + >>> reference = Reference( + ... name="A paper title", + ... doi="A doi", + ... ).save() + """ + + class Meta(Record.Meta, TracksRun.Meta, TracksUpdates.Meta): + abstract = False + + id: int = models.AutoField(primary_key=True) + """Internal id, valid only in one DB instance.""" + uid: str = models.CharField(unique=True, max_length=12, default=ids.base62_12) + """Universal id, valid across DB instances.""" + name: str = models.CharField(max_length=255, default=None, db_index=True) + """Title or name of the reference.""" + abbr: str | None = models.CharField( + max_length=32, db_index=True, unique=True, null=True, default=None + ) + """A unique abbreviation.""" + url: str | None = models.URLField(max_length=255, null=True, default=None) + """A URL to view.""" + pubmed_id: int | None = models.BigIntegerField(null=True, default=None) + """A pudbmed ID.""" + doi: int | None = models.CharField( + max_length=255, null=True, default=None, db_index=True + ) + """A DOI.""" + text: str | None = models.TextField(null=True, default=None) + """Text of the reference included in search, e.g. the abstract or the full-text.""" + artifacts: Artifact = models.ManyToManyField( + Artifact, through="ArtifactReference", related_name="references" + ) + """Artifacts labeled with this reference.""" + + +class ArtifactReference(Record, LinkORM, TracksRun): + id: int = models.BigAutoField(primary_key=True) + artifact: Artifact = models.ForeignKey( + Artifact, CASCADE, related_name="links_reference" + ) + reference: Reference = models.ForeignKey( + Reference, PROTECT, related_name="links_artifact" + ) + feature: Feature = models.ForeignKey( + Feature, + PROTECT, + null=True, + default=None, + related_name="links_artifactreference", + ) + label_ref_is_name: bool | None = models.BooleanField(null=True, default=None) + feature_ref_is_name: bool | None = models.BooleanField(null=True, default=None) diff --git a/noxfile.py b/noxfile.py new file mode 100644 index 0000000..1da68d7 --- /dev/null +++ b/noxfile.py @@ -0,0 +1,31 @@ +import os + +import nox +from laminci.nox import ( + build_docs, + install_lamindb, + login_testuser1, + run, + run_pre_commit, + run_pytest, +) + +# we'd like to aggregate coverage information across sessions +# and for this the code needs to be located in the same +# directory in every github action runner +# this also allows to break out an installation section +nox.options.default_venv_backend = "none" +IS_PR = os.getenv("GITHUB_EVENT_NAME") != "push" + + +@nox.session +def lint(session: nox.Session) -> None: + run_pre_commit(session) + + +@nox.session() +def build(session): + branch = "main" if IS_PR else "release" # point back to "release" + install_lamindb(session, branch=branch, extras="bionty,aws") + run(session, "uv pip install --system .[dev]") + run_pytest(session) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..3bbb6dd --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,135 @@ +[build-system] +requires = ["flit_core >=3.2,<4"] +build-backend = "flit_core.buildapi" + +[project] +name = "cellregistry" +authors = [{name = "Lamin Labs", email = "open-source@lamin.ai"}] +readme = "README.md" +dynamic = ["version", "description"] +dependencies = [ + "lamindb" +] + +[project.urls] +Home = "https://github.com/laminlabs/cellregistry" + +[project.optional-dependencies] +dev = [ + "pre-commit", + "nox", + "pytest>=6.0", + "pytest-cov", + "nbproject_test", +] + +[tool.pytest.ini_options] +testpaths = [ + "tests", +] + +[tool.ruff] +src = ["src"] +line-length = 88 +lint.select = [ + "F", # Errors detected by Pyflakes + "E", # Error detected by Pycodestyle + "W", # Warning detected by Pycodestyle + "I", # isort + "D", # pydocstyle + "B", # flake8-bugbear + "TID", # flake8-tidy-imports + "C4", # flake8-comprehensions + "BLE", # flake8-blind-except + "UP", # pyupgrade + "RUF100", # Report unused noqa directives + "TCH", # Typing imports + "NPY", # Numpy specific rules + "PTH", # Use pathlib + "S" # Security +] +lint.ignore = [ + # Do not catch blind exception: `Exception` + "BLE001", + # Errors from function calls in argument defaults. These are fine when the result is immutable. + "B008", + # line too long -> we accept long comment lines; black gets rid of long code lines + "E501", + # Do not assign a lambda expression, use a def -> lambda expression assignments are convenient + "E731", + # allow I, O, l as variable names -> I is the identity matrix + "E741", + # Missing docstring in public module + "D100", + # undocumented-public-class + "D101", + # Missing docstring in public method + "D102", + # Missing docstring in public function + "D103", + # Missing docstring in public package + "D104", + # __magic__ methods are are often self-explanatory, allow missing docstrings + "D105", + # Missing docstring in public nested class + "D106", + # Missing docstring in __init__ + "D107", + ## Disable one in each pair of mutually incompatible rules + # We don’t want a blank line before a class docstring + "D203", + # 1 blank line required after class docstring + "D204", + # first line should end with a period [Bug: doesn't work with single-line docstrings] + # We want docstrings to start immediately after the opening triple quote + "D213", + # Section underline is over-indented ("{name}") + "D215", + # First line should be in imperative mood; try rephrasing + "D401", + # First word of the first line should be capitalized: {} -> {} + "D403", + # First word of the docstring should not be "This" + "D404", + # Section name should end with a newline ("{name}") + "D406", + # Missing dashed underline after section ("{name}") + "D407", + # Section underline should be in the line following the section's name ("{name}") + "D408", + # Section underline should match the length of its name ("{name}") + "D409", + # No blank lines allowed between a section header and its content ("{name}") + "D412", + # Missing blank line after last section ("{name}") + "D413", + # Missing argument description in the docstring + "D417", + # Imports unused + "F401", + # camcelcase imported as lowercase + "N813", + # module import not at top level of file + "E402", + # open()` should be replaced by `Path.open() + "PTH123", + # subprocess` call: check for execution of untrusted input - https://github.com/PyCQA/bandit/issues/333 + "S603", + # Starting a process with a partial executable path + "S607" +] + +[tool.ruff.lint.pydocstyle] +convention = "google" + +[tool.ruff.lint.per-file-ignores] +"docs/*" = ["I"] +"tests/**/*.py" = [ + "D", # docstrings are allowed to look a bit off + "S101", # asserts allowed in tests... + "ARG", # Unused function args -> fixtures nevertheless are functionally relevant... + "FBT", # Don't care about booleans as positional arguments in tests, e.g. via @pytest.mark.parametrize() + "PLR2004", # Magic value used in comparison, ... + "S311", # Standard pseudo-random generators are not suitable for cryptographic purposes +] +"*/__init__.py" = ["F401"] diff --git a/tests/test_integrity.py b/tests/test_integrity.py new file mode 100644 index 0000000..cd8a7dc --- /dev/null +++ b/tests/test_integrity.py @@ -0,0 +1,17 @@ +import lamindb_setup as ln_setup +import pytest + + +@pytest.fixture(scope="module") +def setup_instance(): + ln_setup.init(storage="./testdb", schema="cellregistry") + yield + ln_setup.delete("testdb", force=True) + + +def test_migrate_check(setup_instance): + assert ln_setup.migrate.check() + + +def test_system_check(setup_instance): + ln_setup.django("check")