diff --git a/LICENSE b/LICENSE index 32686c9a9..b41205876 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2021 Sage Bionetworks +Copyright (c) 2024 Sage Bionetworks Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 6423c36d9..e1767f25d 100644 --- a/README.md +++ b/README.md @@ -553,12 +553,23 @@ Expired messages when the application attempts to export telemetry data. # Contributors -Main contributors and developers: +Sage main contributors and developers: + +- [Gianna Jordan](https://github.com/giajordan) +- [Lingling Peng](https://github.com/linglp) +- [Bryan Fauble](https://github.com/BryanFauble) +- [Andrew Lamb](https://github.com/andrewelamb) +- [Brad Macdonald](https://github.com/BWMac) - [Milen Nikolov](https://github.com/milen-sage) + +## Alumni - [Mialy DeFelice](https://github.com/mialy-defelice) - [Sujay Patil](https://github.com/sujaypatil96) - [Bruno Grande](https://github.com/BrunoGrandePhD) -- [Robert Allaway](https://github.com/allaway) -- [Gianna Jordan](https://github.com/giajordan) -- [Lingling Peng](https://github.com/linglp) +- [Jason Hwee](https://github.com/hweej) +- [Xengie Doan](https://github.com/xdoan) +- [James Eddy](https://github.com/jaeddy) +- [Yooree Chae](https://github.com/ychae) + +See all [contributors](https://github.com/Sage-Bionetworks/schematic/graphs/contributors) diff --git a/docs/source/asset_store.rst b/docs/source/asset_store.rst new file mode 100644 index 000000000..80cbd008b --- /dev/null +++ b/docs/source/asset_store.rst @@ -0,0 +1,138 @@ +Setting up your asset store +=========================== + +.. note:: + + You can ignore this section if you are just trying to contribute manifests. + +This document covers the minimal recommended elements needed in Synapse to interface with the Data Curator App (DCA) and provides options for Synapse project layout. + +There are two options for setting up a DCC Synapse project: + +1. **Distributed Projects**: Each team of DCC contributors has its own Synapse project that stores the team's datasets. +2. **Single Project**: All DCC datasets are stored in the same Synapse project. + +In each of these project setups, there are two ways you can lay out your data: + +1. **Flat Data Layout**: All top level folders structured under the project + + .. code-block:: shell + + my_flat_project + ├── biospecimen + └── clinical + +2. **Hierarchical Data Layout**: Top level folders are stored within nested folders annotated with ``contentType: dataset`` + + .. note:: + + This requires you to add the column ``contentType`` to your fileview schema. + + .. code-block:: shell + + my_heirarchical_project + ├── biospecimen + │ ├── experiment_1 <- annotated + │ └── experiment_2 <- annotated + └── clinical + ├── batch_1 <- annotated + └── batch_2 <- annotated + + +Option 1: Distributed Synapse Projects +-------------------------------------- + +Pick **option 1** if you answer "yes" to one or more of the following questions: + +- Does the DCC have multiple contributing institutions/labs, each with different data governance and access controls? +- Does the DCC have multiple institutions with limited cross-institutional sharing? +- Will contributors submit more than 100 datasets per release or per month? +- Are you not willing to annotate each DCC dataset folder with the annotation ``contentType:dataset``? + +Access & Project Setup - Multiple Contributing Projects +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +1. Create a DCC Admin Team with admin permissions. +2. Create a Team for each data contributing institution. Begin with a "Test Team" if all teams are not yet identified. +3. Create a Synapse Project for each institution and grant the respective team **Edit** level access. + + - E.g., for institutions A, B, and C, create Projects A, B, and C with Teams A, B, and C. Team A has **Edit** access to Project A, etc. + +4. Within each project, create "top level folders" in the **Files** tab for each dataset type. +5. Create another Synapse Project (e.g., MyDCC) containing the main **Fileview** that includes in the scope all the DCC projects. + + - Ensure all teams have **Download** level access to this file view. + - Include both file and folder entities and add **ALL default columns**. + +.. note:: + + Note: If you want to upload data according to hierachical data layout, you can still use + distributed projects, just the ``contentType`` column to your fileview, and you will have + to annotate your top level folders with ``contentType:dataset``. + + +Option 2: Single Synapse Project +-------------------------------- + +Pick **option 2** if you don't select option 1 and you answer "yes" to any of these questions: + +- Does the DCC have a project with pre-existing datasets in a complex folder hierarchy? +- Does the DCC envision collaboration on the same dataset collection across multiple teams with shared access controls? +- Are you willing to set up local access control for each dataset folder and annotate each with ``contentType: dataset``? + +If neither option fits, select option 1. + + +Access & Project Setup - Single Contributing Project +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +1. Create a Team for each data contributing institution. +2. Create a single Synapse Project (e.g., MyDCC). +3. Within this project, create dataset folders for each contributor. Organize them as needed. + + - Annotate ``contentType: dataset`` for each top level folder, which should not nest inside other dataset folders and must have unique names. + Taking the above example, you cannot have something like this: + + .. code-block:: shell + + my_heirarchical_project + ├── biospecimen + │ ├── experiment_1 <- annotated + │ └── experiment_2 <- annotated + └── clinical + ├── experiment_1 <- this is not allowed, because experiment_1 is duplicated + └── batch_2 <- annotated + +4. In MyDCC, create the main **DCC Fileview** with `MyDCC` as the scope. Add column ``contentType`` to the schema and grant teams **Download** level access. + + - Ensure all teams have **Download** level access to this file view. + - Add both file and folder entities and add **ALL default columns**. + +.. note:: + + You can technically use the flat data layout with a single project setup, but it is not recommended + as if you have different data contributors contributing similar datatypes, it would lead to a + proliferation of folders per contributor and data type. + +Synapse External Cloud Buckets Setup +------------------------------------ + +If DCC contributors require external cloud buckets, select one of the following configurations. For more information on how to +set this up on Synapse, view this documentation: https://help.synapse.org/docs/Custom-Storage-Locations.2048327803.html + +1. **Basic External Storage Bucket (Default)**: + + - Create an S3 bucket for Synapse uploads via web or CLI. Contributors will upload data without needing AWS credentials. + - Provision an S3 bucket, attach it to the Synapse project, and create folders for specific assay types. + +2. **Custom Storage Location**: + +This is an advanced setup for users that do not want to upload files directly via the Synapse API, but rather +create pointers to the data. + + - For large datasets or if contributors prefer cloud storage, enable uploads via AWS CLI or GCP CLI. + - Configure the custom storage location with an AWS Lambda or Google Cloud function for syncing. + - If using AWS, provision a bucket, set up Lambda sync, and assign IAM write access. + - For GCP, use Google Cloud function sync and obtain contributor emails for access. + +Finally, set up a `synapse-service-lambda` account for syncing external cloud buckets with Synapse, granting "Edit & Delete" permissions on the contributor's project. diff --git a/docs/source/cli_reference.rst b/docs/source/cli_reference.rst index a2bd78cd2..83cda3e2c 100644 --- a/docs/source/cli_reference.rst +++ b/docs/source/cli_reference.rst @@ -2,6 +2,45 @@ CLI Reference ============= +When you're using this tool ``-d`` flag is referring to the Synapse ID of a folder that would be found under the files tab +that contains a manifest and data. This would be referring to a "Top Level Folder". It is not required to provide a ``dataset_id`` +but if you're trying to pull existing annotations by using the ``-a`` flag and the manifest is file-based then you would +need to provide a ``dataset_id``. + + +Generate a new manifest as a Google Sheet +----------------------------------------- + + +.. code-block:: shell + + schematic manifest -c /path/to/config.yml get -dt -s + +Generate an existing manifest from Synapse +------------------------------------------ + +.. code-block:: shell + + schematic manifest -c /path/to/config.yml get -dt -d -s + +Validate a manifest +------------------- + +.. code-block:: shell + + schematic model -c /path/to/config.yml validate -dt -mp + +Submit a manifest as a file +--------------------------- + +.. code-block:: shell + + schematic model -c /path/to/config.yml submit -mp -d -vc -mrt file_only + + +In depth guide +-------------- + .. click:: schematic.__main__:main :prog: schematic :nested: full diff --git a/docs/source/conf.py b/docs/source/conf.py index 677de60a5..5749c5f45 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -13,6 +13,8 @@ import os import sys +import sphinx_rtd_theme + file_dir = os.path.dirname(__file__) sys.path.append(file_dir) import pathlib @@ -27,7 +29,7 @@ toml_metadata = _parse_toml(toml_file_path) project = toml_metadata["name"] -copyright = "2022, Sage Bionetworks" +copyright = "2024, Sage Bionetworks" author = toml_metadata["authors"] @@ -40,7 +42,7 @@ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -extensions = ["sphinx_click"] +extensions = ["sphinx_click", "sphinx_rtd_theme"] # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] @@ -57,15 +59,21 @@ # This pattern also affects html_static_path and html_extra_path. exclude_patterns = [] +# The master toctree document. +master_doc = "index" # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = "alabaster" +html_theme = "sphinx_rtd_theme" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ["_static"] + +html_theme_options = { + "collapse_navigation": False, +} diff --git a/docs/source/configuration.rst b/docs/source/configuration.rst new file mode 100644 index 000000000..5ab4111ec --- /dev/null +++ b/docs/source/configuration.rst @@ -0,0 +1,84 @@ +Configure Schematic +=================== + +This is an example config for Schematic. All listed values are those that are the default if a config is not used. Remove any fields in the config you don't want to change. +If you remove all fields from a section, the entire section should be removed including the header. +Change the values of any fields you do want to change. Please view the installation section for details on how to set some of this up. + +.. code-block:: yaml + + # This describes where assets such as manifests are stored + asset_store: + # This is when assets are stored in a synapse project + synapse: + # Synapse ID of the file view listing all project data assets. + master_fileview_id: "syn23643253" + # Path to the synapse config file, either absolute or relative to this file + config: ".synapseConfig" + # Base name that manifest files will be saved as + manifest_basename: "synapse_storage_manifest" + + # This describes information about manifests as it relates to generation and validation + manifest: + # Location where manifests will saved to + manifest_folder: "manifests" + # Title or title prefix given to generated manifest(s) + title: "example" + # Data types of manifests to be generated or data type (singular) to validate manifest against + data_type: + - "Biospecimen" + - "Patient" + + # Describes the location of your schema + model: + # Location of your schema jsonld, it must be a path relative to this file or absolute + location: "tests/data/example.model.jsonld" + + # This section is for using google sheets with Schematic + google_sheets: + # Path to the google service account creds, either absolute or relative to this file + service_acct_creds: "schematic_service_account_creds.json" + # When doing google sheet validation (regex match) with the validation rules. + # true is alerting the user and not allowing entry of bad values. + # false is warning but allowing the entry on to the sheet. + strict_validation: true + + +This document will go into detail what each of these configurations mean. + +Asset Store +----------- + +Synapse +~~~~~~~ +This describes where assets such as manifests are stored and the configurations of the asset store is described +under the asset store section. + +* master_fileview_id: Synapse ID of the file view listing all project data assets. +* config: Path to the synapse config file, either absolute or relative to this file. Note, if you use `synapse config` command, you will have to provide the full path to the configuration file. +* manifest_basename: Base name that manifest files will be saved as on Synapse. The Component will be appended to it so for example: `synapse_storage_manifest_biospecimen.csv` + +Manifest +-------- +This describes information about manifests as it relates to generation and validation. Note: some of these configurations can be overwritten by the CLI commands. + +* manifest_folder: Location where manifests will saved to. This can be a relative or absolute path on your local machine. +* title: Title or title prefix given to generated manifest(s). This is used to name the manifest file saved locally. +* data_type: Data types of manifests to be generated or data type (singular) to validate manifest against. If you wanted all the available manifests, you can input "all manifests" + + +Model +----- +Describes the location of your schema + +* location: This is the location of your schema jsonld, it must be a path relative to this file or absolute path. Currently URL's are NOT supported, so you will have to download the jsonld data model. Here is an example: https://raw.githubusercontent.com/ncihtan/data-models/v24.9.1/HTAN.model.jsonld + +Google Sheets +------------- +Schematic leverages the Google API to generate manifests. This section is for using google sheets with Schematic + +* service_acct_creds: Path to the google service account creds, either absolute or relative to this file. This is the path to the service account credentials file that you download from Google Cloud Platform. +* strict_validation: When doing google sheet validation (regex match) with the validation rules. + + * True is alerting the user and not allowing entry of bad values. + * False is warning but allowing the entry on to the sheet. diff --git a/docs/source/index.rst b/docs/source/index.rst index 2d235a77a..9b535d881 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -6,7 +6,145 @@ Welcome to Schematic's documentation! ===================================== +.. warning:: + This documentation site is a work in progress, and the sublinks may change. Apologies for the inconvenience. + + +**SCHEMATIC** is an acronym for *Schema Engine for Manifest Ingress and Curation*. +The Python-based infrastructure provides a *novel* schema-based, metadata ingress ecosystem, +which is meant to streamline the process of biomedical dataset annotation, metadata validation, +and submission to a data repository for various data contributors. This tool is a recommened to be +used as a command line tool (CLI) and as an API. + +Schematic tackles these goals: + +- Ensure the highest quality structured data or metadata be contributed to Synapse. +- Provide excel templates that correspond to a data model that can be filled out by data contributors. +- Visualize and manage data models and their relationships with each other + +.. contents:: + :depth: 2 + :local: + +Important Concepts +------------------ + +.. important:: + + Before moving reading more about schematic, this section covers essential concepts relevant for using the Schematic tool effectively. + +Synapse FileViews +~~~~~~~~~~~~~~~~~ +Users are responsible for setting up a **FileView** that integrates with Schematic. Note that FileViews appear under the "Tables" tab in Synapse and can be named according to the project's needs. For instance, a FileView for the **Project A** could have a different name than a FileView for the **Project B**. + +For more information on Synapse projects, visit: + +- `Synapse projects `_ +- `Synapse annotations `_ + +Synapse Folders +~~~~~~~~~~~~~~~ + +Folders in Synapse allow users to organize data within projects. More details on uploading and organizing data can be found at `Synapse folders `_ + +Synapse Datasets +~~~~~~~~~~~~~~~~ + +This is an object in Synapse which appears under the "Dataset" tab and represents a user-defined collection of Synapse files and versions. https://help.synapse.org/docs/Datasets.2611281979.html + +JSON-LD +~~~~~~~ +JSON-LD is a lightweight Linked Data format. The usage of JSON-LD to capture our data models +extends beyond the creation, validation, and submission of annotations/manifests into Synapse +It can create relationships between different data models and, in the future, drive +transformation of data from one data model to another. Visualization of these data models +and their relationships is also possible which allows the community to see the depth of +connections between all the data uploaded into Synapse. + +Manifest +~~~~~~~~ + +A manifest is a structured file that contains metadata about files under a "top level folder". +The metadata includes information of the files such as data type and etc. +The manifest can also used to annotate the data on Synapse and create a file view +that enables the FAIR principles on each of the files in the "top level folder". + +Component/Data type +~~~~~~~~~~~~~~~~~~~ +"component" and "data type" are used interchangeably. The component/data type is determined from the specified JSON-LD data model. +If the string "component" exists in the depends on column, the "Attribute" value in that row is a data type. +Examples of a data type is "Biospecimen", "Patient": https://github.com/Sage-Bionetworks/schematic/blob/develop/tests/data/example.model.csv#L3. +Each data type/component should a manifest template that has different columns. + +Project Data Layout +~~~~~~~~~~~~~~~~~~~ + +Regardless of data layout, the data in your Synapse Project(s) are uploaded into Synapse Folders to be curated and annotated by schematic. +In both layouts listed below, the project administrators along with the data contributors may have preferences on how the +data is organized. The organization of your data is specified with the "Component / data type" attribute of your data model and +act as logical groupings for your data. Schematic has a concept of a ``dataset`` (parameters for the API/library/CLI), but this means +different things under these two layouts. + +* **Hierarchical**: The "dataset" parameter under this data layout is associated with any folder that has ``contentType: dataset`` annotated + and is often associated with a "dataset". +* **Flat**: The "dataset" parameter under this data layout is often referred to as "top level folders". + +In both of these layouts, these are really just groupings of resources. + + +Schematic services +------------------ + +The following are the four main endpoints that assist with the high-level goals outlined above, with additional goals to come. + +Manifest Generation +~~~~~~~~~~~~~~~~~~~ + +Provides a manifest template for users for a particular project or data type. If a project with annotations already exists, a semi-filled-out template is provided to the user so that they do not start from scratch. If there are no existing annotations, an empty manifest template is provided. + +Manifest Validation +~~~~~~~~~~~~~~~~~~~ + +Given a filled-out manifest: + +- The manifest is validated against the JSON-LD schema as it maps to GX rules. +- A ``jsonschema`` is generated from the data model. The data model can be in CSV, JSON-LD format, as input formats are decoupled from the internal data model representation within Schematic. +- A set of validation rules is defined in the data model. Some validation rules are implemented via GX; others are custom Python code. All validation rules have the same interface. +- Certain GX rules require looping through all projects a user has access to, or a specified scope of projects, to find other projects with manifests. +- Validation results are provided before the manifest file is uploaded into Synapse. + +Manifest Submission +~~~~~~~~~~~~~~~~~~~ + +Given a filled out manifest, this will allow you to submit the manifest to the "top level folder". +This is validates the manifest and... + +- If manifest is invalid, erorr messages will be returned. +- If the manifest is valid: + + - Stores the manifest in Synapse. + - Uploads the manifest as a Synapse File, Annotations on Files, and/or Synapse Table. + +More validation documentation can be found here: https://sagebionetworks.jira.com/wiki/spaces/SCHEM/pages/3302785036/Schematic+Validation + +Data Model Visualization +~~~~~~~~~~~~~~~~~~~~~~~~ + +These endpoints allows you to visulize your data models and their relationships with each other. + + +API reference +------------- + +For the entire Python API reference documentation, you can visit the docs here: https://sage-bionetworks.github.io/schematic/ + .. toctree:: :maxdepth: 1 + :hidden: + installation + asset_store + configuration + tutorials + troubleshooting cli_reference diff --git a/docs/source/installation.rst b/docs/source/installation.rst new file mode 100644 index 000000000..0e38f3894 --- /dev/null +++ b/docs/source/installation.rst @@ -0,0 +1,306 @@ +Installation +============ + +Installation Requirements +------------------------- + +- Your installed python version must be 3.9.0 ≤ version < 3.11.0 +- You need to be a registered and certified user on `synapse.org `_ + +.. note:: + To create Google Sheets files from Schematic, please follow our credential policy for Google credentials. You can find a detailed tutorial `Google Credentials Guide `_. + If you're using ``config.yml``, make sure to specify the path to ``schematic_service_account_creds.json`` (see the ``google_sheets > service_account_creds`` section for more information). + +Installation Guide For: Users +----------------------------- + +The instructions below assume you have already installed `python `_, with the release version meeting the constraints set in the `Installation Requirements`_ section, and do not have a Python environment already active. + +1. Verify your python version +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Ensure your python version meets the requirements from the `Installation Requirements`_ section using the following command: + +.. code-block:: shell + + python3 --version + +If your current Python version is not supported by Schematic, you can switch to the supported version using a tool like `pyenv `_. Follow the instructions in the pyenv documentation to install and switch between Python versions easily. + +.. note:: + You can double-check the current supported python version by opening up the `pyproject.toml `_ file in this repository and finding the supported versions of python in the script. + +2. Set up your virtual environment +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Once you are working with a python version supported by `schematic`, you will need to activate a virtual environment within which you can install the package. Below we will show how to create your virtual environment either with ``venv`` or with ``conda``. + +2a. Set up your virtual environment with ``venv`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Python 3 has built-in support for virtual environments with the ``venv`` module, so you no longer need to install ``virtualenv``: + +.. code-block:: shell + + python3 -m venv .venv + source .venv/bin/activate + +2b. Set up your virtual environment with ``conda`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``conda`` is a powerful package and environment management tool that allows users to create isolated environments used particularly in data science and machine learning workflows. If you would like to manage your environments with ``conda``, continue reading: + +1. **Download your preferred ``conda`` installer**: Begin by `installing conda `_. We personally recommend working with Miniconda, which is a lightweight installer for ``conda`` that includes only ``conda`` and its dependencies. +2. **Execute the ``conda`` installer**: Once you have downloaded your preferred installer, execute it using ``bash`` or ``zsh``, depending on the shell configured for your terminal environment. For example: + + .. code-block:: shell + + bash Miniconda3-latest-MacOSX-arm64.sh + +3. **Verify your ``conda`` setup**: Follow the prompts to complete your setup. Then verify your setup by running the ``conda`` command. +4. **Create your ``schematic`` environment**: Begin by creating a fresh ``conda`` environment for ``schematic`` like so: + + .. code-block:: shell + + conda create --name 'schematicpy' python=3.10 + +5. **Activate the environment**: Once your environment is set up, you can now activate your new environment with ``conda``: + + .. code-block:: shell + + conda activate schematicpy + +3. Install ``schematic`` dependencies +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Install the package using `pip `_: + +.. code-block:: shell + + python3 -m pip install schematicpy + +If you run into ``ERROR: Failed building wheel for numpy``, the error might be able to resolve by upgrading pip. Please try to upgrade pip by: + +.. code-block:: shell + + pip3 install --upgrade pip + +4. Get your data model as a ``JSON-LD`` schema file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Now you need a schema file, e.g. ``model.jsonld``, to have a data model that schematic can work with. While you can download a super basic `example data model `_, you'll probably be working with a DCC-specific data model. For non-Sage employees/contributors using the CLI, you might care only about the minimum needed artifact, which is the ``.jsonld``; locate and download only that from the right repo. + +Here are some example repos with schema files: + +- https://github.com/ncihtan/data-models/ +- https://github.com/nf-osi/nf-metadata-dictionary/ + +5. Obtain Google credential files +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Any function that interacts with a Google sheet (such as ``schematic manifest get``) requires Google Cloud credentials. + +1. **Option 1**: `Step-by-step `_ guide on how to create these credentials in Google Cloud. + - Depending on your institution's policies, your institutional Google account may or may not have the required permissions to complete this. A possible workaround is to use a personal or temporary Google account. + +.. warning:: + At the time of writing, Sage Bionetworks employees do not have the appropriate permissions to create projects with their Sage Bionetworks Google accounts. You would follow instructions using a personal Google account. + +2. **Option 2**: Ask your DCC/development team if they have credentials previously set up with a service account. + +Once you have obtained credentials, be sure that the json file generated is named in the same way as the ``service_acct_creds`` parameter in your ``config.yml`` file. You will find more context on the ``config.yml`` in section [6. Set up configuration files](#6-set-up-configuration-files). + +.. note:: + Running ``schematic init`` is no longer supported due to security concerns. To obtain ``schematic_service_account_creds.json``, please follow the `instructions `_. Schematic uses Google's API to generate Google sheet templates that users fill in to provide (meta)data. Most Google sheet functionality could be authenticated with service account. However, more complex Google sheet functionality requires token-based authentication. As browser support that requires the token-based authentication diminishes, we are hoping to deprecate token-based authentication and keep only service account authentication in the future. + +.. note:: + Use the ``schematic_service_account_creds.json`` file for the service account mode of authentication (*for Google services/APIs*). Service accounts are special Google accounts that can be used by applications to access Google APIs programmatically via OAuth2.0, with the advantage being that they do not require human authorization. + +6. Set up configuration files +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The following section will walk through setting up your configuration files with your credentials to allow for communication between ``schematic`` and the Synapse API. + +There are two main configuration files that need to be created and modified: + +- ``.synapseConfig`` +- ``config.yml`` + +**Create and modify the ``.synapseConfig``** + +The ``.synapseConfig`` file is what enables communication between ``schematic`` and the Synapse API using your credentials. You can automatically generate a ``.synapseConfig`` file by running the following in your command line and following the prompts. + +.. tip:: + You can generate a new authentication token on the Synapse website by going to ``Account Settings`` > ``Personal Access Tokens``. + +.. code-block:: shell + + synapse config + +After following the prompts, a new ``.synapseConfig`` file and ``.synapseCache`` folder will be created in your home directory. You can view these hidden assets in your home directory with the following command: + +.. code-block:: shell + + ls -a ~ + +The ``.synapseConfig`` is used to log into Synapse if you are not using an environment variable (i.e. ``SYNAPSE_ACCESS_TOKEN``) for authentication, and the ``.synapseCache`` is where your assets are stored if you are not working with the CLI and/or you have specified ``.synapseCache`` as the location in which to store your manifests, in your ``config.yml``. + +**Create and modify the ``config.yml``** + +In this repository there is a ``config_example.yml`` file with default configurations to various components that are required before running ``schematic``, such as the Synapse ID of the main file view containing all your project assets, the + +Installation Guide For: Developers +---------------------------------- + +.. note:: + This section is for people developing on Schematic only + +The instructions below assume you have already installed `python `_, with the release version meeting the constraints set in the `Installation Requirements`_ section, and do not have an environment already active (e.g., with ``pyenv``). For development, we recommend working with versions > python 3.9 to avoid issues with ``pre-commit``'s default hook configuration. + +When contributing to this repository, please first discuss the change you wish to make via the `service desk `_ so that we may track these changes. + +Once you have finished setting up your development environment using the instructions below, please follow the guidelines in `CONTRIBUTION.md `_ during your development. + +Please note we have a `code of conduct `_, please follow it in all your interactions with the project. + +1. Clone the ``schematic`` package repository +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For development, you will be working with the latest version of ``schematic`` on the repository to ensure compatibility between its latest state and your changes. Ensure your current working directory is where you would like to store your local fork before running the following command: + +.. code-block:: shell + + git clone https://github.com/Sage-Bionetworks/schematic.git + +2. Install ``poetry`` +~~~~~~~~~~~~~~~~~~~~~ + +Install ``poetry`` (version 1.3.0 or later) using either the `official installer `_ or ``pip``. If you have an older installation of Poetry, we recommend uninstalling it first. + +.. code-block:: shell + + pip install poetry + +Check to make sure your version of poetry is > v1.3.0 + +.. code-block:: shell + + poetry --version + +3. Start the virtual environment +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Change directory (``cd``) into your cloned ``schematic`` repository, and initialize the virtual environment using the following command with ``poetry``: + +.. code-block:: shell + + poetry shell + +To make sure your poetry version and python version are consistent with the versions you expect, you can run the following command: + +.. code-block:: shell + + poetry debug info + +4. Install ``schematic`` dependencies +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Before you begin, make sure you are in the latest ``develop`` branch of the repository. + +The following command will install the dependencies based on what we specify in the ``poetry.lock`` file of this repository (which is generated from the libraries listed in the ``pyproject.toml`` file). If this step is taking a long time, try to go back to Step 2 and check your version of ``poetry``. Alternatively, you can try deleting the lock file and regenerate it by running ``poetry lock`` (Note: this method should be used as a last resort because it may force other developers to change their development environment). + +.. code-block:: shell + + poetry install --dev,doc + +This command will install: +- The main dependencies required for running the package. +- Development dependencies for testing, linting, and code formatting. +- Documentation dependencies such as ``sphinx`` for building and maintaining documentation. + +5. Set up configuration files +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The following section will walk through setting up your configuration files with your credentials to allow for communication between ``schematic`` and the Synapse API. + +There are two main configuration files that need to be created and modified: +- ``.synapseConfig`` +- ``config.yml`` + +**Create and modify the ``.synapseConfig``** + +The ``.synapseConfig`` file is what enables communication between ``schematic`` and the Synapse API using your credentials. You can automatically generate a ``.synapseConfig`` file by running the following in your command line and following the prompts. + +.. tip:: + You can generate a new authentication token on the Synapse website by going to ``Account Settings`` > ``Personal Access Tokens``. + +.. code-block:: shell + + synapse config + +After following the prompts, a new ``.synapseConfig`` file and ``.synapseCache`` folder will be created in your home directory. You can view these hidden assets in your home directory with the following command: + +.. code-block:: shell + + ls -a ~ + +The ``.synapseConfig`` is used to log into Synapse if you are not using an environment variable (i.e., ``SYNAPSE_ACCESS_TOKEN``) for authentication, and the ``.synapseCache`` is where your assets are stored if you are not working with the CLI and/or you have specified ``.synapseCache`` as the location to store your manifests in your ``config.yml``. + +.. important:: + When developing on ``schematic``, keep your ``.synapseConfig`` in your current working directory to avoid authentication errors. + +**Create and modify the ``config.yml``** + +In this repository, there is a ``config_example.yml`` file with default configurations to various components required before running ``schematic``, such as the Synapse ID of the main file view containing all your project assets, the base name of your manifest files, etc. + +Copy the contents of the ``config_example.yml`` (located in the base directory of the cloned ``schematic`` repo) into a new file called ``config.yml``: + +.. code-block:: shell + + cp config_example.yml config.yml + +Once you've copied the file, modify its contents according to your use case. For example, if you wanted to change the folder where manifests are downloaded, your config should look like: + +.. code-block:: text + + manifest: + manifest_folder: "my_manifest_folder_path" + +.. important:: + Be sure to update your ``config.yml`` with the location of your ``.synapseConfig`` created in the step above to avoid authentication errors. Paths can be specified relative to the ``config.yml`` file or as absolute paths. + By default, the ``.synapseConfig`` file is created in your home directory, so as an example, the configuration file will have to contain `/full/path/to/.synapseConfig` as the path to the ``.synapseConfig`` file or be in the same + directory as the ``config.yml`` file. + +.. note:: + ``config.yml`` is ignored by git. + +6. Obtain Google credential files +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Any function that interacts with a Google Sheet (such as ``schematic manifest get``) requires Google Cloud credentials. + +1. **Option 1**: Follow the step-by-step `guide `_ on how to create these credentials in Google Cloud. + - Depending on your institution's policies, your institutional Google account may or may not have the required permissions to complete this. A possible workaround is to use a personal or temporary Google account. + +.. warning:: + At the time of writing, Sage Bionetworks employees do not have the appropriate permissions to create projects with their Sage Bionetworks Google accounts. You would follow instructions using a personal Google account. + +2. **Option 2**: Ask your DCC/development team if they have credentials previously set up with a service account. + +Once you have obtained credentials, ensure that the JSON file generated is named in the same way as the ``service_acct_creds`` parameter in your ``config.yml`` file. + +.. important:: + For testing, ensure there is no environment variable ``SCHEMATIC_SERVICE_ACCOUNT_CREDS``. Check the file ``.env`` to ensure this is not set. Also, verify that config files used for testing, such as ``config_example.yml``, do not contain ``service_acct_creds_synapse_id``. + +.. note:: + Running ``schematic init`` is no longer supported due to security concerns. To obtain ``schematic_service_account_creds.json``, please follow the `instructions `_. Schematic uses Google's API to generate Google Sheet templates that users fill in to provide (meta)data. + Most Google Sheet functionality could be authenticated with a service account. However, more complex Google Sheet functionality requires token-based authentication. As browser support that requires token-based authentication diminishes, we hope to deprecate token-based authentication and keep only service account authentication in the future. + +.. note:: + Use the ``schematic_service_account_creds.json`` file for the service account mode of authentication (*for Google services/APIs*). Service accounts are special Google accounts that can be used by applications to access Google APIs programmatically via OAuth2.0, with the advantage being that they do not require human authorization. + + +7. Verify your setup +~~~~~~~~~~~~~~~~~~~~ + +After running the steps above, your setup is complete, and you can test it in a ``python`` instance or by running a command based on the examples diff --git a/docs/source/troubleshooting.rst b/docs/source/troubleshooting.rst new file mode 100644 index 000000000..3ffb16b1e --- /dev/null +++ b/docs/source/troubleshooting.rst @@ -0,0 +1,98 @@ +Troubleshooting +=============== + +These are some common issues you may encounter when using schematic + +Debugging +--------- +Whether you are using DCA or schematic API or schematic library/CLI, the following are some steps that you want to take to debug your issues. Here are some steps to walk you through the process. + +1. What was the command that caused the error? +2. Is the error listed down below? +3. Did you follow the workflow outlined in the tutorials section under: "Contributing your manifest with the CLI"? + + 1. If you are validating or submitting the manifest, how was the manifest initiatially generated? If manually and NOT using schematic, there may be errors. + 2. If the manifest was generated by schematic, when was it generated? Did you download the previously submitted manifest from Synapse and modify it? Did you download it and resubmit it? Please run the manifest generate command again to have a fresh manifest. + +4. Create a Github issue or reach out to your respective DCC service desks. What is the schematic or DCA configuration used? Specifically, it's most important to capture the following: + + 1. `data_type`: This is the same as Component in the data model. + 2. `master_fileview_id`: This is the Synapse ID of the file view listing all project data. + 3. `data model url`: This is the link to your data model. + 4. `dataset_id`: This is the "top level folder" (folder annoated with contentType: Datatset). + 5. What is the command or API call that you made? If you are using DCA, please provide the step at which you encountered the error (manifest generate, validate, submit, etc) + + .. code-block:: bash + + schematic manifest -c /path/to/config.yml get -dt -s + # OR (PLEASE REDACT YOUR BEARER TOKEN) + curl -X 'GET' \ + 'https://schematic.api.sagebionetworks.org/v1/manifest/generate?schema_url=https%3A%2F%2Fraw.githubusercontent.com%2Fnf-osi%2Fnf-metadata-dictionary%2Fv9.8.0%2FNF.jsonld&title=Example&data_type=EpigeneticsAssayTemplate&use_annotations=true&dataset_id=syn63305821&asset_view=syn16858331&output_format=google_sheet&strict_validation=true&data_model_labels=class_label' \ + -H 'accept: application/json' ... + + +Manifest Submit: `RuntimeError: failed with SynapseHTTPError('400 Client Error: nan is not a valid Synapse ID.')` +----------------------------------------------------------------------------------------------------------------- + +As for 24.10.2 version of Schematic, we require the `Filename` column to have the full paths to the file on Synapse including the project name. +You will encounter this issue if you try an submit a manifest with wrong filenames. For example, if your file in your project has this full path +`my_project/my_folder/my_file.txt`, you will get this error by: + +* not containing full path (e.g. `my_file.txt`) +* Wrong filename (e.g. `my_project/my_folder/wrong_file_name.txt`) +* Wrong filepath (e.g. `my_project/wrong_folder/my_file.txt`) + +This is because we join the `Filename` column together with what's in Synapse to append the `entityId` column if it's missing. + +To fix: You will want to first check if your "Top Level Folder" has a manifest with invalid Filename values in the column. +If so, please generate a manifest with schematic which should fix the Filenames OR (the less preferred solution) manually update the Filenames to include the full path to the file and manually upload. + + +Manifest Submit: `TypeError: boolean value of NA is ambiguous` +-------------------------------------------------------------- + +You may encounter this error if your manifest has a Component column but it is empty. This may occur if the manifest in your "Top Level Folder" +does not contain this column. During manifest generate, it will create an empty column for you. + +To fix: Check if your manifest has an empty Component column. Please fill out this column with the correct Component values and submit the manifest again. + + +Manifest validation: `The submitted metadata does not contain all required column(s)` +------------------------------------------------------------------------------------- + +The required columns are determined by the data model, but `Component` should be a required column even if it's not set that way in the data model. +This is the validation error you may get if you don't have the `Component` column. + +To fix: Check if your manifest has a Component column or missing other required columns. Please add the `Component` column (and fill it out) or any other required columns. + + +Manifest validation: `The submitted metadata contains << 'string' >> in the Component column, but requested validation for << expected string >>` +------------------------------------------------------------------------------------------------------------------------------------------------- + +If the manifest has incorrect Component values, you might get the validation error message above. This is because the Component value is incorrect, +and the validation rule uses the "display" value of what's expected in the Component column. For example, the display name could be "Imaging Assay" +but the actual Component name is "ImagingAssayTemplate". + +To fix: Check if your manifest has invalid Component values and fill it out correctly. Using the above example, fill out your Component column with "ImagingAssayTemplate" + + +Manifest Generate: `KeyError: entityId` +--------------------------------------- + +Fixed: v24.12.1 + +If there is currently a manifest in your "Top Level Folder" on Synapse with an incorrect Filename BUT entityId column. +You will be able to run manifest generate to create a new manifest with the new Filenames. However, If this manifest on Synapse does +NOT have the entityId column you will encounter that error. + +To fix: You will want to first check if your "Top Level Folder" has a manifest without the entityId column. +If so, you can either submit your manifest using schematic OR (the less preferred solution) manually add the entityId column to the manifest on Synapse. + +Manifest Generate: `ValueError: cannot insert eTag, already exists` +------------------------------------------------------------------- + +Fixed: v24.11.2 + +If you do NOT have a manifest in your "Top Level Folder" on Synapse and your File entities in this folder are annotated with 'eTag' key and you try to generate a manifest, it will fail. + +To fix: This should be fixed in schematic, but for now, remove the 'eTag' annotation from your file. diff --git a/docs/source/tutorials.rst b/docs/source/tutorials.rst new file mode 100644 index 000000000..6ecc1f6ff --- /dev/null +++ b/docs/source/tutorials.rst @@ -0,0 +1,95 @@ +Tutorials +========= + + +Contributing your manifest with the CLI +--------------------------------------- + +In this tutorial, you'll learn how to contribute your metadata manifests to Synapse using the `CLI`. Following best practices, +we will cover generating, validating, and submitting your manifest in a structured workflow. + +.. note:: + + Whether you have submitted manifests before to your "Top Level Folder" (see important terminology) OR are submitting a new manifest, we **strongly recommend** you to follow this workflow. + If you deviate from this workflow or upload files to Synapse directly without using schematic, you risk the errors outlined in the + troubleshooting section of the documentation. + + Question: What if I've already gone through this workflow, can I download the manifest, modify it and upload it to Synapse without Schematic? + + Answer: Yes, but you risk running into errors when others use these commands. + Updates may have been made to the data model by the DCC and these changes won't be reflected unless you regenerate your manfiest. + We strongly recommend not doing that. + + +Prerequisites +~~~~~~~~~~~~~ + +1. **Install and configure Schematic**: Ensure that you have installed `schematic` and set up its dependencies. See "Installation Guide For: Users" for more information. +2. **Important Concepts**: Make sure you know the important concepts outlined on the home page of the doc site. +3. **Configuration**: Read more here about each of the attributes in the configuration file. + +Steps to Contribute a Manifest +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The contribution process includes three main commands. +For information about the parameters of each of these commands, please refer to the CLI Reference section. + +1. **Generate** a manifest to fill out +2. **Validate** the manifest (optional, since it's included in submission) +3. **Submit** the manifest to Synapse + + +Step 1: Generate a Manifest +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The `schematic manifest get` command that creates a manifest template based on a data model and existing manifests. + +.. note:: + + This step is crucial for ensuring that your manifest includes all the necessary columns and headers. As of v24.10.2, you will + want to generate the manifest to ensure the right Filenames are populated in your manifest. If you just uploaded your folder + with data or files to a folder, you may see that this files are missing or get a `LookUp` error. This is an artifact of Synapse + fileviews, please run this command again. + +.. code-block:: bash + + schematic manifest -c /path/to/config.yml get -dt -s + +- **Data Type**: The data type or schema model for your manifest (e.g., "Patient", "Biospecimen").. + +This command will create a CSV file with the necessary columns and headers, which you can then fill with your metadata. + +Step 2: Validate the Manifest (Optional) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Though optional, `schematic model validate`` is a useful step to ensure that your manifest meets the required standards before submission. +It checks for any errors, such as missing or incorrectly formatted values. + +.. note:: + + If your manifest has an empty Component column, you will need to fill it out before validation. + +.. code-block:: bash + + schematic model -c /path/to/config.yml validate -dt -mp + +If validation passes, you'll see a success message; if there are errors, `schematic` will list them. Correct any issues before proceeding to submission. + +Step 3: Submit the Manifest to Synapse +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The `schematic model submit` command uploads your manifest to Synapse. This command will automatically validate +the manifest as part of the submission process, so if you prefer, you can skip the standalone validation step. + +.. note:: + + During the manifest submission, it will fill out the entityId column if it's missing. + +.. code-block:: bash + + schematic model -c /path/to/config.yml submit -mp -d -vc -mrt file_only + +This command will: + +- Validate your manifest +- If validation is successful, submit it to the specified "Top Level Folder" (see important terminology) in Synapse. diff --git a/poetry.lock b/poetry.lock index 0180edc00..de83a1dda 100644 --- a/poetry.lock +++ b/poetry.lock @@ -647,18 +647,18 @@ test = ["pytest"] [[package]] name = "connexion" -version = "2.14.1" +version = "2.14.2" description = "Connexion - API first applications with OpenAPI/Swagger and Flask" optional = false python-versions = ">=3.6" files = [ - {file = "connexion-2.14.1-py2.py3-none-any.whl", hash = "sha256:f343717241b4c4802a694c38fee66fb1693c897fe4ea5a957fa9b3b07caf6394"}, - {file = "connexion-2.14.1.tar.gz", hash = "sha256:99aa5781e70a7b94f8ffae8cf89f309d49cdb811bbd65a8e2f2546f3b19a01e6"}, + {file = "connexion-2.14.2-py2.py3-none-any.whl", hash = "sha256:a73b96a0e07b16979a42cde7c7e26afe8548099e352cf350f80c57185e0e0b36"}, + {file = "connexion-2.14.2.tar.gz", hash = "sha256:dbc06f52ebeebcf045c9904d570f24377e8bbd5a6521caef15a06f634cf85646"}, ] [package.dependencies] clickclick = ">=1.2,<21" -flask = ">=1.0.4,<3" +flask = ">=1.0.4,<2.3" inflection = ">=0.3.1,<0.6" itsdangerous = ">=0.24" jsonschema = ">=2.5.1,<5" @@ -666,14 +666,14 @@ packaging = ">=20" PyYAML = ">=5.1,<7" requests = ">=2.9.1,<3" swagger-ui-bundle = {version = ">=0.0.2,<0.1", optional = true, markers = "extra == \"swagger-ui\""} -werkzeug = ">=1.0,<3" +werkzeug = ">=1.0,<2.3" [package.extras] aiohttp = ["MarkupSafe (>=0.23)", "aiohttp (>=2.3.10,<4)", "aiohttp-jinja2 (>=0.14.0,<2)"] docs = ["sphinx-autoapi (==1.8.1)"] -flask = ["flask (>=1.0.4,<3)", "itsdangerous (>=0.24)"] +flask = ["flask (>=1.0.4,<2.3)", "itsdangerous (>=0.24)"] swagger-ui = ["swagger-ui-bundle (>=0.0.2,<0.1)"] -tests = ["MarkupSafe (>=0.23)", "aiohttp (>=2.3.10,<4)", "aiohttp-jinja2 (>=0.14.0,<2)", "aiohttp-remotes", "decorator (>=5,<6)", "flask (>=1.0.4,<3)", "itsdangerous (>=0.24)", "pytest (>=6,<7)", "pytest-aiohttp", "pytest-cov (>=2,<3)", "swagger-ui-bundle (>=0.0.2,<0.1)", "testfixtures (>=6,<7)"] +tests = ["MarkupSafe (>=0.23)", "aiohttp (>=2.3.10,<4)", "aiohttp-jinja2 (>=0.14.0,<2)", "aiohttp-remotes", "decorator (>=5,<6)", "flask (>=1.0.4,<2.3)", "itsdangerous (>=0.24)", "pytest (>=6,<7)", "pytest-aiohttp", "pytest-cov (>=2,<3)", "swagger-ui-bundle (>=0.0.2,<0.1)", "testfixtures (>=6,<7)"] [[package]] name = "coverage" @@ -4267,6 +4267,25 @@ click = ">=7.0" docutils = "*" sphinx = ">=2.0" +[[package]] +name = "sphinx-rtd-theme" +version = "3.0.1" +description = "Read the Docs theme for Sphinx" +optional = false +python-versions = ">=3.8" +files = [ + {file = "sphinx_rtd_theme-3.0.1-py2.py3-none-any.whl", hash = "sha256:921c0ece75e90633ee876bd7b148cfaad136b481907ad154ac3669b6fc957916"}, + {file = "sphinx_rtd_theme-3.0.1.tar.gz", hash = "sha256:a4c5745d1b06dfcb80b7704fe532eb765b44065a8fad9851e4258c8804140703"}, +] + +[package.dependencies] +docutils = ">0.18,<0.22" +sphinx = ">=6,<9" +sphinxcontrib-jquery = ">=4,<5" + +[package.extras] +dev = ["bump2version", "transifex-client", "twine", "wheel"] + [[package]] name = "sphinxcontrib-applehelp" version = "2.0.0" @@ -4315,6 +4334,20 @@ lint = ["mypy", "ruff (==0.5.5)", "types-docutils"] standalone = ["Sphinx (>=5)"] test = ["html5lib", "pytest"] +[[package]] +name = "sphinxcontrib-jquery" +version = "4.1" +description = "Extension to include jQuery on newer Sphinx releases" +optional = false +python-versions = ">=2.7" +files = [ + {file = "sphinxcontrib-jquery-4.1.tar.gz", hash = "sha256:1620739f04e36a2c779f1a131a2dfd49b2fd07351bf1968ced074365933abc7a"}, + {file = "sphinxcontrib_jquery-4.1-py2.py3-none-any.whl", hash = "sha256:f936030d7d0147dd026a4f2b5a57343d233f1fc7b363f68b3d4f1cb0993878ae"}, +] + +[package.dependencies] +Sphinx = ">=1.8" + [[package]] name = "sphinxcontrib-jsmath" version = "1.0.1" @@ -4772,20 +4805,20 @@ test = ["websockets"] [[package]] name = "werkzeug" -version = "2.3.8" +version = "2.2.3" description = "The comprehensive WSGI web application library." optional = false python-versions = ">=3.8" files = [ - {file = "werkzeug-2.3.8-py3-none-any.whl", hash = "sha256:bba1f19f8ec89d4d607a3bd62f1904bd2e609472d93cd85e9d4e178f472c3748"}, - {file = "werkzeug-2.3.8.tar.gz", hash = "sha256:554b257c74bbeb7a0d254160a4f8ffe185243f52a52035060b761ca62d977f03"}, + {file = "Werkzeug-2.2.3-py3-none-any.whl", hash = "sha256:56433961bc1f12533306c624f3be5e744389ac61d722175d543e1751285da612"}, + {file = "Werkzeug-2.2.3.tar.gz", hash = "sha256:2e1ccc9417d4da358b9de6f174e3ac094391ea1d4fbef2d667865d819dfd0afe"}, ] [package.dependencies] MarkupSafe = ">=2.1.1" [package.extras] -watchdog = ["watchdog (>=2.3)"] +watchdog = ["watchdog"] [[package]] name = "widgetsnbextension" diff --git a/pyproject.toml b/pyproject.toml index 4a75a7ad3..44d300bda 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -98,6 +98,7 @@ pre-commit = "^3.6.2" [tool.poetry.group.doc.dependencies] pdoc = "^14.0.0" +sphinx-rtd-theme = "3.0.1" sphinx = "7.3.7" sphinx-click = "4.4.0"